{"cells":[{"cell_type":"markdown","id":"sufficient-resource","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"9C881BFEF296447B85A4A4F93152CEC0","trusted":true,"collapsed":false,"scrolled":false,"mdEditEnable":false},"source":"### 一、导入数据"},{"metadata":{"id":"0F573894522348CFB19E7ED0469E8439","notebookId":"60b34773e77c4200173bea40","jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"trusted":true},"cell_type":"code","outputs":[],"source":"# 导入需要的库\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport warnings\nimport datetime\nimport seaborn as sns\nsns.set_style(\"darkgrid\")\nwarnings.filterwarnings(\"ignore\")\n%matplotlib inline","execution_count":null},{"cell_type":"code","execution_count":6,"id":"comic-catalyst","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"459992A387BF426F91F4B5715E2012EB","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"# 读取之前数据集成数据\ndata=pd.read_csv(\"../input/eco_part.csv\",index_col=\"Unnamed: 0\")"},{"cell_type":"code","execution_count":7,"id":"communist-inside","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"EC93B3AB2E6840C294CB58C63246A9B3","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"        Month  Manufacturing_Industry_Index  Manufacturing_YOY  \\\n0  2021-02-01                          50.6             0.4174   \n1  2021-01-01                          51.3             0.0260   \n2  2020-12-01                          51.9             0.0339   \n3  2020-11-01                          52.1             0.0378   \n4  2020-10-01                          51.4             0.0426   \n\n   Nonmanufacturing_Industry_Index  Nonmanufacturing_YOY  Current_Month_FDI  \\\n0                             51.4                0.7365                NaN   \n1                             52.4               -0.0314                NaN   \n2                             55.7                0.0411                NaN   \n3                             56.4                0.0368                NaN   \n4                             56.2                0.0644              118.3   \n\n   FDI_YOY  FDI_Comparative_Rate  FDI_Total  FDI_Total_YOY  ...  \\\n0      NaN                   NaN        NaN            NaN  ...   \n1      NaN                   NaN        NaN            NaN  ...   \n2      NaN                   NaN        NaN            NaN  ...   \n3      NaN                   NaN        NaN            NaN  ...   \n4   0.1832               -0.1704        NaN            NaN  ...   \n\n   New_Investor_Acct  New_Investor_Comparative  New_Investor_YOY  \\\n0              160.9                   -23.15%            79.74%   \n1              209.4                    29.13%           161.60%   \n2              162.2                     6.20%           100.40%   \n3              152.7                    36.34%            84.77%   \n4              112.0                   -27.33%            41.11%   \n\n   Ending_Investors  Ending_Investors_A  Ending_Investors_B  Hushen_Total  \\\n0          18147.87            18086.26              239.71       80.08万亿   \n1          17986.92            17925.22              239.70       80.16万亿   \n2          17777.49            17715.72              239.71       79.72万亿   \n3          17615.31            17553.45              239.72       76.95万亿   \n4          17462.60            17400.30              239.78       73.58万亿   \n\n   Hushen_Avg  Shanghai_Securities_Composite_Index  Shanghai_index_Growth_Rate  \n0      44.28万                              3509.08                        0.75  \n1      44.72万                              3483.07                        0.29  \n2      45.00万                              3473.07                        2.40  \n3      43.84万                              3391.76                        5.19  \n4      42.29万                              3224.53                        0.20  \n\n[5 rows x 114 columns]","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Month</th>\n      <th>Manufacturing_Industry_Index</th>\n      <th>Manufacturing_YOY</th>\n      <th>Nonmanufacturing_Industry_Index</th>\n      <th>Nonmanufacturing_YOY</th>\n      <th>Current_Month_FDI</th>\n      <th>FDI_YOY</th>\n      <th>FDI_Comparative_Rate</th>\n      <th>FDI_Total</th>\n      <th>FDI_Total_YOY</th>\n      <th>...</th>\n      <th>New_Investor_Acct</th>\n      <th>New_Investor_Comparative</th>\n      <th>New_Investor_YOY</th>\n      <th>Ending_Investors</th>\n      <th>Ending_Investors_A</th>\n      <th>Ending_Investors_B</th>\n      <th>Hushen_Total</th>\n      <th>Hushen_Avg</th>\n      <th>Shanghai_Securities_Composite_Index</th>\n      <th>Shanghai_index_Growth_Rate</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>0</td>\n      <td>2021-02-01</td>\n      <td>50.6</td>\n      <td>0.4174</td>\n      <td>51.4</td>\n      <td>0.7365</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>...</td>\n      <td>160.9</td>\n      <td>-23.15%</td>\n      <td>79.74%</td>\n      <td>18147.87</td>\n      <td>18086.26</td>\n      <td>239.71</td>\n      <td>80.08万亿</td>\n      <td>44.28万</td>\n      <td>3509.08</td>\n      <td>0.75</td>\n    </tr>\n    <tr>\n      <td>1</td>\n      <td>2021-01-01</td>\n      <td>51.3</td>\n      <td>0.0260</td>\n      <td>52.4</td>\n      <td>-0.0314</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>...</td>\n      <td>209.4</td>\n      <td>29.13%</td>\n      <td>161.60%</td>\n      <td>17986.92</td>\n      <td>17925.22</td>\n      <td>239.70</td>\n      <td>80.16万亿</td>\n      <td>44.72万</td>\n      <td>3483.07</td>\n      <td>0.29</td>\n    </tr>\n    <tr>\n      <td>2</td>\n      <td>2020-12-01</td>\n      <td>51.9</td>\n      <td>0.0339</td>\n      <td>55.7</td>\n      <td>0.0411</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>...</td>\n      <td>162.2</td>\n      <td>6.20%</td>\n      <td>100.40%</td>\n      <td>17777.49</td>\n      <td>17715.72</td>\n      <td>239.71</td>\n      <td>79.72万亿</td>\n      <td>45.00万</td>\n      <td>3473.07</td>\n      <td>2.40</td>\n    </tr>\n    <tr>\n      <td>3</td>\n      <td>2020-11-01</td>\n      <td>52.1</td>\n      <td>0.0378</td>\n      <td>56.4</td>\n      <td>0.0368</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>...</td>\n      <td>152.7</td>\n      <td>36.34%</td>\n      <td>84.77%</td>\n      <td>17615.31</td>\n      <td>17553.45</td>\n      <td>239.72</td>\n      <td>76.95万亿</td>\n      <td>43.84万</td>\n      <td>3391.76</td>\n      <td>5.19</td>\n    </tr>\n    <tr>\n      <td>4</td>\n      <td>2020-10-01</td>\n      <td>51.4</td>\n      <td>0.0426</td>\n      <td>56.2</td>\n      <td>0.0644</td>\n      <td>118.3</td>\n      <td>0.1832</td>\n      <td>-0.1704</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>...</td>\n      <td>112.0</td>\n      <td>-27.33%</td>\n      <td>41.11%</td>\n      <td>17462.60</td>\n      <td>17400.30</td>\n      <td>239.78</td>\n      <td>73.58万亿</td>\n      <td>42.29万</td>\n      <td>3224.53</td>\n      <td>0.20</td>\n    </tr>\n  </tbody>\n</table>\n<p>5 rows × 114 columns</p>\n</div>"},"execution_count":7}],"source":"data.head()"},{"cell_type":"markdown","id":"herbal-fraud","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"3951D0D1C4044BC38B48103557496658","trusted":true,"mdEditEnable":false},"source":"### 二、标签选择"},{"cell_type":"markdown","id":"excessive-detail","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"4A4DC8432409408C819EC066E68FBED9","trusted":true,"mdEditEnable":false},"source":"由于中国经济数据指标太多，我们选择对宏观经济数据指标进行预测，所以在此之前我们需要对经济数据先进性标签的选择。"},{"cell_type":"code","execution_count":8,"id":"rotary-kernel","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"C4E4F204AA344843A0EBF107FD196465","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"#产生时间序列\ndata[\"Month\"]=pd.to_datetime(data[\"Month\"])\ndata=data.set_index(\"Month\")"},{"cell_type":"code","execution_count":9,"id":"spanish-spring","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"A2E8C096E4DB4A66A9C57693D2BA82A5","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"            Manufacturing_Industry_Index  Manufacturing_YOY  \\\nMonth                                                         \n2021-02-01                          50.6             0.4174   \n2021-01-01                          51.3             0.0260   \n2020-12-01                          51.9             0.0339   \n2020-11-01                          52.1             0.0378   \n2020-10-01                          51.4             0.0426   \n\n            Nonmanufacturing_Industry_Index  Nonmanufacturing_YOY  \\\nMonth                                                               \n2021-02-01                             51.4                0.7365   \n2021-01-01                             52.4               -0.0314   \n2020-12-01                             55.7                0.0411   \n2020-11-01                             56.4                0.0368   \n2020-10-01                             56.2                0.0644   \n\n            Current_Month_FDI  FDI_YOY  FDI_Comparative_Rate  FDI_Total  \\\nMonth                                                                     \n2021-02-01                NaN      NaN                   NaN        NaN   \n2021-01-01                NaN      NaN                   NaN        NaN   \n2020-12-01                NaN      NaN                   NaN        NaN   \n2020-11-01                NaN      NaN                   NaN        NaN   \n2020-10-01              118.3   0.1832               -0.1704        NaN   \n\n            FDI_Total_YOY  Nation_Current_Month  ...  New_Investor_Acct  \\\nMonth                                            ...                      \n2021-02-01            NaN                  99.8  ...              160.9   \n2021-01-01            NaN                  99.7  ...              209.4   \n2020-12-01            NaN                 100.2  ...              162.2   \n2020-11-01            NaN                  99.5  ...              152.7   \n2020-10-01            NaN                 100.5  ...              112.0   \n\n            New_Investor_Comparative  New_Investor_YOY  Ending_Investors  \\\nMonth                                                                      \n2021-02-01                   -23.15%            79.74%          18147.87   \n2021-01-01                    29.13%           161.60%          17986.92   \n2020-12-01                     6.20%           100.40%          17777.49   \n2020-11-01                    36.34%            84.77%          17615.31   \n2020-10-01                   -27.33%            41.11%          17462.60   \n\n            Ending_Investors_A  Ending_Investors_B  Hushen_Total  Hushen_Avg  \\\nMonth                                                                          \n2021-02-01            18086.26              239.71       80.08万亿      44.28万   \n2021-01-01            17925.22              239.70       80.16万亿      44.72万   \n2020-12-01            17715.72              239.71       79.72万亿      45.00万   \n2020-11-01            17553.45              239.72       76.95万亿      43.84万   \n2020-10-01            17400.30              239.78       73.58万亿      42.29万   \n\n            Shanghai_Securities_Composite_Index  Shanghai_index_Growth_Rate  \nMonth                                                                        \n2021-02-01                              3509.08                        0.75  \n2021-01-01                              3483.07                        0.29  \n2020-12-01                              3473.07                        2.40  \n2020-11-01                              3391.76                        5.19  \n2020-10-01                              3224.53                        0.20  \n\n[5 rows x 113 columns]","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Manufacturing_Industry_Index</th>\n      <th>Manufacturing_YOY</th>\n      <th>Nonmanufacturing_Industry_Index</th>\n      <th>Nonmanufacturing_YOY</th>\n      <th>Current_Month_FDI</th>\n      <th>FDI_YOY</th>\n      <th>FDI_Comparative_Rate</th>\n      <th>FDI_Total</th>\n      <th>FDI_Total_YOY</th>\n      <th>Nation_Current_Month</th>\n      <th>...</th>\n      <th>New_Investor_Acct</th>\n      <th>New_Investor_Comparative</th>\n      <th>New_Investor_YOY</th>\n      <th>Ending_Investors</th>\n      <th>Ending_Investors_A</th>\n      <th>Ending_Investors_B</th>\n      <th>Hushen_Total</th>\n      <th>Hushen_Avg</th>\n      <th>Shanghai_Securities_Composite_Index</th>\n      <th>Shanghai_index_Growth_Rate</th>\n    </tr>\n    <tr>\n      <th>Month</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>2021-02-01</td>\n      <td>50.6</td>\n      <td>0.4174</td>\n      <td>51.4</td>\n      <td>0.7365</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>99.8</td>\n      <td>...</td>\n      <td>160.9</td>\n      <td>-23.15%</td>\n      <td>79.74%</td>\n      <td>18147.87</td>\n      <td>18086.26</td>\n      <td>239.71</td>\n      <td>80.08万亿</td>\n      <td>44.28万</td>\n      <td>3509.08</td>\n      <td>0.75</td>\n    </tr>\n    <tr>\n      <td>2021-01-01</td>\n      <td>51.3</td>\n      <td>0.0260</td>\n      <td>52.4</td>\n      <td>-0.0314</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>99.7</td>\n      <td>...</td>\n      <td>209.4</td>\n      <td>29.13%</td>\n      <td>161.60%</td>\n      <td>17986.92</td>\n      <td>17925.22</td>\n      <td>239.70</td>\n      <td>80.16万亿</td>\n      <td>44.72万</td>\n      <td>3483.07</td>\n      <td>0.29</td>\n    </tr>\n    <tr>\n      <td>2020-12-01</td>\n      <td>51.9</td>\n      <td>0.0339</td>\n      <td>55.7</td>\n      <td>0.0411</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>100.2</td>\n      <td>...</td>\n      <td>162.2</td>\n      <td>6.20%</td>\n      <td>100.40%</td>\n      <td>17777.49</td>\n      <td>17715.72</td>\n      <td>239.71</td>\n      <td>79.72万亿</td>\n      <td>45.00万</td>\n      <td>3473.07</td>\n      <td>2.40</td>\n    </tr>\n    <tr>\n      <td>2020-11-01</td>\n      <td>52.1</td>\n      <td>0.0378</td>\n      <td>56.4</td>\n      <td>0.0368</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>99.5</td>\n      <td>...</td>\n      <td>152.7</td>\n      <td>36.34%</td>\n      <td>84.77%</td>\n      <td>17615.31</td>\n      <td>17553.45</td>\n      <td>239.72</td>\n      <td>76.95万亿</td>\n      <td>43.84万</td>\n      <td>3391.76</td>\n      <td>5.19</td>\n    </tr>\n    <tr>\n      <td>2020-10-01</td>\n      <td>51.4</td>\n      <td>0.0426</td>\n      <td>56.2</td>\n      <td>0.0644</td>\n      <td>118.3</td>\n      <td>0.1832</td>\n      <td>-0.1704</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>100.5</td>\n      <td>...</td>\n      <td>112.0</td>\n      <td>-27.33%</td>\n      <td>41.11%</td>\n      <td>17462.60</td>\n      <td>17400.30</td>\n      <td>239.78</td>\n      <td>73.58万亿</td>\n      <td>42.29万</td>\n      <td>3224.53</td>\n      <td>0.20</td>\n    </tr>\n  </tbody>\n</table>\n<p>5 rows × 113 columns</p>\n</div>"},"execution_count":9}],"source":"data.head()"},{"cell_type":"markdown","id":"liked-following","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"59D50DEDAED5459890020C631E0955F0","trusted":true,"mdEditEnable":false},"source":"PPI通常作为观察通货膨胀水平的重要指标。PPI能够反映生产者获得原材料的价格波动情况，推算预期CPI，从而估计通胀风险"},{"cell_type":"code","execution_count":10,"id":"actual-craps","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"F6F03740899544728485D1A4C3056E54","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"stream","text":"<class 'pandas.core.frame.DataFrame'>\nDatetimeIndex: 182 entries, 2021-02-01 to 2006-01-01\nColumns: 113 entries, Manufacturing_Industry_Index to Shanghai_index_Growth_Rate\ndtypes: float64(88), object(25)\nmemory usage: 162.1+ KB\n","name":"stdout"}],"source":"data.info()"},{"cell_type":"code","execution_count":11,"id":"formed-sydney","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"A67354887AF24EF996516CF84CFAF152","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"# 我们认为在中国经济指标中，一些同比和环比这是我们数据增强的手段，帮助我们了解数据变化的。 \n# 所以我们删除数据指标中的环比与同比，\nnew_col=[]\nfor i in  data.columns.tolist():\n    if  \"YOY\" in i:\n        continue\n    elif \"Comparative\" in i:\n        continue\n    new_col.append(i)"},{"cell_type":"code","execution_count":12,"id":"british-advice","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"D8257EA789CF4A9DBD0D3194CD69EF24","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"data=data[new_col]"},{"cell_type":"code","execution_count":13,"id":"established-federation","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"E48CD7005D694F609D97A71F16D876D0","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"stream","text":"<class 'pandas.core.frame.DataFrame'>\nDatetimeIndex: 182 entries, 2021-02-01 to 2006-01-01\nData columns (total 55 columns):\nManufacturing_Industry_Index           158 non-null float64\nNonmanufacturing_Industry_Index        158 non-null float64\nCurrent_Month_FDI                      148 non-null float64\nFDI_Total                              143 non-null float64\nNation_Current_Month                   158 non-null float64\nNation_Total                           158 non-null float64\nCity_Current_Month                     158 non-null float64\nCity_Total                             158 non-null float64\nCountry_Current_Month                  158 non-null float64\nCountry_Total                          158 non-null float64\nRMB_Fore_Current_Month_Value           144 non-null float64\nRMB_Total                              144 non-null float64\nFis_Current_Month_Value                155 non-null float64\nFis_Current_Month_Comparattive         155 non-null float64\nFis_Total                              155 non-null float64\nUF_Current_month_Value                 144 non-null float64\nTotal_from_Beg_of_the_Year             144 non-null float64\nBusiness_Index                         33 non-null float64\nLand_DevelopArea_Index                 33 non-null float64\nPrice_Index                            33 non-null float64\nCumulative_Growth                      144 non-null float64\nCurrent_Export                         156 non-null object\nCurrent_Import                         156 non-null object\nTotal_Export                           156 non-null float64\nTotal_Import                           156 non-null float64\nM2                                     158 non-null float64\nM1                                     158 non-null float64\nM0                                     158 non-null float64\nCPGI                                   181 non-null float64\nAgricultural_Products_Index            181 non-null float64\nMineral_Products_Index                 181 non-null float64\nKerosene_Power_Index                   181 non-null float64\nCurrent_Month_x                        148 non-null object\nCurrent_Month_Comparattive_x           148 non-null object\nTotal_x                                148 non-null float64\nCCI                                    169 non-null float64\nCSI                                    169 non-null float64\nCEI                                    169 non-null float64\nFE_Reserve                             158 non-null float64\nGold_Reserve                           158 non-null float64\nCurrent_Month_y                        144 non-null float64\nCurrent_Month_Comparattive_y           144 non-null object\nTotal_y                                144 non-null float64\nNC_Current_Month                       158 non-null float64\nNC_Current_Month_Comparattive          158 non-null object\nNC_Total                               158 non-null float64\nData_Date                              71 non-null object\nNew_Investor_Acct                      71 non-null float64\nEnding_Investors                       71 non-null float64\nEnding_Investors_A                     71 non-null float64\nEnding_Investors_B                     71 non-null float64\nHushen_Total                           71 non-null object\nHushen_Avg                             71 non-null object\nShanghai_Securities_Composite_Index    71 non-null float64\nShanghai_index_Growth_Rate             71 non-null float64\ndtypes: float64(46), object(9)\nmemory usage: 79.6+ KB\n","name":"stdout"}],"source":"data.info()"},{"cell_type":"markdown","id":"valid-region","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"5EAD293FBC7247F28BB3E27C81E2105D","trusted":true,"mdEditEnable":false},"source":"简单删除数据之后，就只剩下57列数据，对于中国宏观经济数据预测我们还需要进一步选取几个标签，作为我们的指数预测，当然这一步我们查询资料，通过专家确定中国宏观经济数据指标。"},{"cell_type":"markdown","id":"retained-annex","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"1E6B84370EE84FF4A9777AA4D36E75B9","trusted":true,"mdEditEnable":false},"source":"### 三、数据预处理\n"},{"cell_type":"code","execution_count":14,"id":"measured-sampling","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"155227A8234F4FB2BD472D6BAB6C2662","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"stream","text":"<class 'pandas.core.frame.DataFrame'>\nDatetimeIndex: 182 entries, 2021-02-01 to 2006-01-01\nData columns (total 55 columns):\nManufacturing_Industry_Index           158 non-null float64\nNonmanufacturing_Industry_Index        158 non-null float64\nCurrent_Month_FDI                      148 non-null float64\nFDI_Total                              143 non-null float64\nNation_Current_Month                   158 non-null float64\nNation_Total                           158 non-null float64\nCity_Current_Month                     158 non-null float64\nCity_Total                             158 non-null float64\nCountry_Current_Month                  158 non-null float64\nCountry_Total                          158 non-null float64\nRMB_Fore_Current_Month_Value           144 non-null float64\nRMB_Total                              144 non-null float64\nFis_Current_Month_Value                155 non-null float64\nFis_Current_Month_Comparattive         155 non-null float64\nFis_Total                              155 non-null float64\nUF_Current_month_Value                 144 non-null float64\nTotal_from_Beg_of_the_Year             144 non-null float64\nBusiness_Index                         33 non-null float64\nLand_DevelopArea_Index                 33 non-null float64\nPrice_Index                            33 non-null float64\nCumulative_Growth                      144 non-null float64\nCurrent_Export                         156 non-null object\nCurrent_Import                         156 non-null object\nTotal_Export                           156 non-null float64\nTotal_Import                           156 non-null float64\nM2                                     158 non-null float64\nM1                                     158 non-null float64\nM0                                     158 non-null float64\nCPGI                                   181 non-null float64\nAgricultural_Products_Index            181 non-null float64\nMineral_Products_Index                 181 non-null float64\nKerosene_Power_Index                   181 non-null float64\nCurrent_Month_x                        148 non-null object\nCurrent_Month_Comparattive_x           148 non-null object\nTotal_x                                148 non-null float64\nCCI                                    169 non-null float64\nCSI                                    169 non-null float64\nCEI                                    169 non-null float64\nFE_Reserve                             158 non-null float64\nGold_Reserve                           158 non-null float64\nCurrent_Month_y                        144 non-null float64\nCurrent_Month_Comparattive_y           144 non-null object\nTotal_y                                144 non-null float64\nNC_Current_Month                       158 non-null float64\nNC_Current_Month_Comparattive          158 non-null object\nNC_Total                               158 non-null float64\nData_Date                              71 non-null object\nNew_Investor_Acct                      71 non-null float64\nEnding_Investors                       71 non-null float64\nEnding_Investors_A                     71 non-null float64\nEnding_Investors_B                     71 non-null float64\nHushen_Total                           71 non-null object\nHushen_Avg                             71 non-null object\nShanghai_Securities_Composite_Index    71 non-null float64\nShanghai_index_Growth_Rate             71 non-null float64\ndtypes: float64(46), object(9)\nmemory usage: 79.6+ KB\n","name":"stdout"}],"source":"data.info()"},{"cell_type":"markdown","id":"universal-lying","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"54306EAC389447558BB5682EEA51CF8F","trusted":true,"mdEditEnable":false},"source":"\"Current_Export\",\"Current_Import\",\"M2\",\"M1\",\"\",\"FE_Reserve\",\"Gold_Reserve\",  \"Fis_Current_Month_Value\",\"Nation_Current_Month\",\"City_Current_Month\",\"Country_Current_Month\"  \n我们选取出口与进口量，中国货币供应量的M1，M2指标，国家外汇储备，\t黄金储备(万盎司)  \n中国财政收入当月数据， 消费物价指数CPI，中国居民消费价格城市当月，中国居民消费价格农村当月"},{"cell_type":"code","execution_count":15,"id":"preceding-oregon","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"561A3BB2FECB4890B8B42A66586BC8AE","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"# 选取特征\ndataset=data[[\"Current_Export\",\"Current_Import\",\"M2\",\"M1\",\"M0\",\"FE_Reserve\",\"Gold_Reserve\",\"Fis_Current_Month_Value\",\"Nation_Current_Month\",\"City_Current_Month\",\"Country_Current_Month\"]]"},{"cell_type":"code","execution_count":16,"id":"advised-acrylic","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"CC0C951E3B0D4E33B5921FEEFD8AD651","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"           Current_Export Current_Import         M2        M1       M0  \\\nMonth                                                                    \n2021-02-01              -              -  2236000.0  593500.0  91900.0   \n2021-01-01            NaN            NaN  2213000.0  625600.0  89600.0   \n2020-12-01           2819           2038  2186800.0  625600.0  84300.0   \n2020-11-01           2681           1926  2172000.0  618600.0  81600.0   \n2020-10-01           2372           1787  2149700.0  609200.0  81000.0   \n...                   ...            ...        ...       ...      ...   \n2006-05-01            NaN            NaN        NaN       NaN      NaN   \n2006-04-01            NaN            NaN        NaN       NaN      NaN   \n2006-03-01            NaN            NaN        NaN       NaN      NaN   \n2006-02-01            NaN            NaN        NaN       NaN      NaN   \n2006-01-01            NaN            NaN        NaN       NaN      NaN   \n\n            FE_Reserve  Gold_Reserve  Fis_Current_Month_Value  \\\nMonth                                                           \n2021-02-01    32049.94        6264.0                      0.0   \n2021-01-01    32106.71        6264.0                      NaN   \n2020-12-01    32165.22        6264.0                  13406.0   \n2020-11-01    31784.90        6264.0                  10956.0   \n2020-10-01    31279.82        6264.0                  17531.0   \n...                ...           ...                      ...   \n2006-05-01         NaN           NaN                      NaN   \n2006-04-01         NaN           NaN                      NaN   \n2006-03-01         NaN           NaN                      NaN   \n2006-02-01         NaN           NaN                      NaN   \n2006-01-01         NaN           NaN                      NaN   \n\n            Nation_Current_Month  City_Current_Month  Country_Current_Month  \nMonth                                                                        \n2021-02-01                  99.8                99.8                   99.9  \n2021-01-01                  99.7                99.6                   99.9  \n2020-12-01                 100.2               100.2                  100.2  \n2020-11-01                  99.5                99.6                   99.2  \n2020-10-01                 100.5               100.5                  100.4  \n...                          ...                 ...                    ...  \n2006-05-01                   NaN                 NaN                    NaN  \n2006-04-01                   NaN                 NaN                    NaN  \n2006-03-01                   NaN                 NaN                    NaN  \n2006-02-01                   NaN                 NaN                    NaN  \n2006-01-01                   NaN                 NaN                    NaN  \n\n[182 rows x 11 columns]","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Current_Export</th>\n      <th>Current_Import</th>\n      <th>M2</th>\n      <th>M1</th>\n      <th>M0</th>\n      <th>FE_Reserve</th>\n      <th>Gold_Reserve</th>\n      <th>Fis_Current_Month_Value</th>\n      <th>Nation_Current_Month</th>\n      <th>City_Current_Month</th>\n      <th>Country_Current_Month</th>\n    </tr>\n    <tr>\n      <th>Month</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>2021-02-01</td>\n      <td>-</td>\n      <td>-</td>\n      <td>2236000.0</td>\n      <td>593500.0</td>\n      <td>91900.0</td>\n      <td>32049.94</td>\n      <td>6264.0</td>\n      <td>0.0</td>\n      <td>99.8</td>\n      <td>99.8</td>\n      <td>99.9</td>\n    </tr>\n    <tr>\n      <td>2021-01-01</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>2213000.0</td>\n      <td>625600.0</td>\n      <td>89600.0</td>\n      <td>32106.71</td>\n      <td>6264.0</td>\n      <td>NaN</td>\n      <td>99.7</td>\n      <td>99.6</td>\n      <td>99.9</td>\n    </tr>\n    <tr>\n      <td>2020-12-01</td>\n      <td>2819</td>\n      <td>2038</td>\n      <td>2186800.0</td>\n      <td>625600.0</td>\n      <td>84300.0</td>\n      <td>32165.22</td>\n      <td>6264.0</td>\n      <td>13406.0</td>\n      <td>100.2</td>\n      <td>100.2</td>\n      <td>100.2</td>\n    </tr>\n    <tr>\n      <td>2020-11-01</td>\n      <td>2681</td>\n      <td>1926</td>\n      <td>2172000.0</td>\n      <td>618600.0</td>\n      <td>81600.0</td>\n      <td>31784.90</td>\n      <td>6264.0</td>\n      <td>10956.0</td>\n      <td>99.5</td>\n      <td>99.6</td>\n      <td>99.2</td>\n    </tr>\n    <tr>\n      <td>2020-10-01</td>\n      <td>2372</td>\n      <td>1787</td>\n      <td>2149700.0</td>\n      <td>609200.0</td>\n      <td>81000.0</td>\n      <td>31279.82</td>\n      <td>6264.0</td>\n      <td>17531.0</td>\n      <td>100.5</td>\n      <td>100.5</td>\n      <td>100.4</td>\n    </tr>\n    <tr>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <td>2006-05-01</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <td>2006-04-01</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <td>2006-03-01</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <td>2006-02-01</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <td>2006-01-01</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n  </tbody>\n</table>\n<p>182 rows × 11 columns</p>\n</div>"},"execution_count":16}],"source":"dataset"},{"cell_type":"code","execution_count":17,"id":"provincial-description","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"FAC9D904864A4C768D35C6DD3CC5E24A","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"Month\n2007-08-01    11\n2007-07-01    11\n2007-06-01    11\n2007-05-01    11\n2007-04-01    11\n2007-03-01    11\n2007-02-01    11\n2007-01-01    11\n2006-12-01    11\n2006-11-01    11\n2006-10-01    11\n2006-09-01    11\n2006-08-01    11\n2006-07-01    11\n2006-06-01    11\n2006-05-01    11\n2006-04-01    11\n2006-03-01    11\n2006-02-01    11\n2006-01-01    11\ndtype: int64"},"execution_count":17}],"source":"dataset.isnull().sum(1).tail(20)"},{"cell_type":"code","execution_count":18,"id":"governing-silicon","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"1E2A0D408660471192376A32648E7059","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"# 删除全为空的行\ndataset.dropna(how=\"all\",inplace=True)"},{"cell_type":"code","execution_count":19,"id":"hundred-jonathan","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"6607BBE8AFAD4B8A8AC04B949DD31531","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"dataset.replace({\"-\":\"NaN\"},inplace=True)"},{"cell_type":"code","execution_count":20,"id":"arbitrary-prize","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"1CD64CA6201B42E4B3B33BB4CEDFFC02","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"dataset[[\"Current_Export\",\"Current_Import\"]]=dataset[[\"Current_Export\",\"Current_Import\"]].astype(\"float\")"},{"cell_type":"code","execution_count":21,"id":"smart-japanese","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"F31EC1A4E68847A78A5F2C59A611721F","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"stream","text":"<class 'pandas.core.frame.DataFrame'>\nDatetimeIndex: 158 entries, 2021-02-01 to 2008-01-01\nData columns (total 11 columns):\nCurrent_Export             154 non-null float64\nCurrent_Import             154 non-null float64\nM2                         158 non-null float64\nM1                         158 non-null float64\nM0                         158 non-null float64\nFE_Reserve                 158 non-null float64\nGold_Reserve               158 non-null float64\nFis_Current_Month_Value    155 non-null float64\nNation_Current_Month       158 non-null float64\nCity_Current_Month         158 non-null float64\nCountry_Current_Month      158 non-null float64\ndtypes: float64(11)\nmemory usage: 14.8 KB\n","name":"stdout"}],"source":"dataset.info()"},{"cell_type":"code","execution_count":22,"id":"instant-watson","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"510D8CDB1AFB4DD39960A51371E37AE2","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"            Current_Export  Current_Import         M2        M1       M0  \\\nMonth                                                                      \n2021-02-01             NaN             NaN  2236000.0  593500.0  91900.0   \n2021-01-01             NaN             NaN  2213000.0  625600.0  89600.0   \n2020-12-01          2819.0          2038.0  2186800.0  625600.0  84300.0   \n2020-11-01          2681.0          1926.0  2172000.0  618600.0  81600.0   \n2020-10-01          2372.0          1787.0  2149700.0  609200.0  81000.0   \n\n            FE_Reserve  Gold_Reserve  Fis_Current_Month_Value  \\\nMonth                                                           \n2021-02-01    32049.94        6264.0                      0.0   \n2021-01-01    32106.71        6264.0                      NaN   \n2020-12-01    32165.22        6264.0                  13406.0   \n2020-11-01    31784.90        6264.0                  10956.0   \n2020-10-01    31279.82        6264.0                  17531.0   \n\n            Nation_Current_Month  City_Current_Month  Country_Current_Month  \nMonth                                                                        \n2021-02-01                  99.8                99.8                   99.9  \n2021-01-01                  99.7                99.6                   99.9  \n2020-12-01                 100.2               100.2                  100.2  \n2020-11-01                  99.5                99.6                   99.2  \n2020-10-01                 100.5               100.5                  100.4  ","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Current_Export</th>\n      <th>Current_Import</th>\n      <th>M2</th>\n      <th>M1</th>\n      <th>M0</th>\n      <th>FE_Reserve</th>\n      <th>Gold_Reserve</th>\n      <th>Fis_Current_Month_Value</th>\n      <th>Nation_Current_Month</th>\n      <th>City_Current_Month</th>\n      <th>Country_Current_Month</th>\n    </tr>\n    <tr>\n      <th>Month</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>2021-02-01</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>2236000.0</td>\n      <td>593500.0</td>\n      <td>91900.0</td>\n      <td>32049.94</td>\n      <td>6264.0</td>\n      <td>0.0</td>\n      <td>99.8</td>\n      <td>99.8</td>\n      <td>99.9</td>\n    </tr>\n    <tr>\n      <td>2021-01-01</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>2213000.0</td>\n      <td>625600.0</td>\n      <td>89600.0</td>\n      <td>32106.71</td>\n      <td>6264.0</td>\n      <td>NaN</td>\n      <td>99.7</td>\n      <td>99.6</td>\n      <td>99.9</td>\n    </tr>\n    <tr>\n      <td>2020-12-01</td>\n      <td>2819.0</td>\n      <td>2038.0</td>\n      <td>2186800.0</td>\n      <td>625600.0</td>\n      <td>84300.0</td>\n      <td>32165.22</td>\n      <td>6264.0</td>\n      <td>13406.0</td>\n      <td>100.2</td>\n      <td>100.2</td>\n      <td>100.2</td>\n    </tr>\n    <tr>\n      <td>2020-11-01</td>\n      <td>2681.0</td>\n      <td>1926.0</td>\n      <td>2172000.0</td>\n      <td>618600.0</td>\n      <td>81600.0</td>\n      <td>31784.90</td>\n      <td>6264.0</td>\n      <td>10956.0</td>\n      <td>99.5</td>\n      <td>99.6</td>\n      <td>99.2</td>\n    </tr>\n    <tr>\n      <td>2020-10-01</td>\n      <td>2372.0</td>\n      <td>1787.0</td>\n      <td>2149700.0</td>\n      <td>609200.0</td>\n      <td>81000.0</td>\n      <td>31279.82</td>\n      <td>6264.0</td>\n      <td>17531.0</td>\n      <td>100.5</td>\n      <td>100.5</td>\n      <td>100.4</td>\n    </tr>\n  </tbody>\n</table>\n</div>"},"execution_count":22}],"source":"dataset.head()"},{"cell_type":"code","execution_count":23,"id":"collaborative-spider","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"9A528EF55D35401D9206CAE32C18CB3B","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"3"},"execution_count":23}],"source":"dataset.isnull().any().sum() #空值数据收集"},{"cell_type":"code","execution_count":24,"id":"governmental-valentine","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"2F1582F32CF440F897EAFF5650579126","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"(158, 11)"},"execution_count":24}],"source":"dataset.shape"},{"cell_type":"code","execution_count":25,"id":"destroyed-nancy","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"CF09279FBD034C3890AD2CC8D6B5C136","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"       Current_Export  Current_Import            M2             M1  \\\ncount      154.000000      154.000000  1.580000e+02     158.000000   \nmean      1735.477922     1433.764286  1.232288e+06  370179.318038   \nstd        418.470194      324.473989  5.306693e+05  141523.797847   \nmin        648.900000      513.400000  4.178462e+05  150177.880000   \n25%       1407.000000     1248.750000  7.594505e+05  266873.472500   \n50%       1811.000000     1478.500000  1.198368e+06  331685.275000   \n75%       2017.750000     1680.500000  1.675284e+06  522183.535000   \nmax       2819.000000     2038.000000  2.236000e+06  625600.000000   \n\n                 M0    FE_Reserve  Gold_Reserve  Fis_Current_Month_Value  \\\ncount    158.000000    158.000000    158.000000               155.000000   \nmean   58220.200886  30513.532405   4382.056962             10773.855355   \nstd    15921.655789   5659.057184   1478.607355              4356.931221   \nmin    30169.300000  15898.100000   1929.000000                 0.000000   \n25%    45259.582500  30061.150000   3389.000000              7707.265000   \n50%    58927.850000  31227.535000   3389.000000             10774.000000   \n75%    70023.812500  32986.025000   5924.000000             13636.430000   \nmax    93249.160000  39932.130000   6264.000000             23621.100000   \n\n       Nation_Current_Month  City_Current_Month  Country_Current_Month  \ncount            158.000000          158.000000             158.000000  \nmean             102.560127          102.501646             102.696392  \nstd                1.933328            1.870288               2.130184  \nmin               98.190000           98.100000              98.420000  \n25%              101.600000          101.700000             101.500000  \n50%              102.300000          102.210000             102.195000  \n75%              103.190000          103.127500             103.330000  \nmax              108.740000          108.500000             109.270000  ","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Current_Export</th>\n      <th>Current_Import</th>\n      <th>M2</th>\n      <th>M1</th>\n      <th>M0</th>\n      <th>FE_Reserve</th>\n      <th>Gold_Reserve</th>\n      <th>Fis_Current_Month_Value</th>\n      <th>Nation_Current_Month</th>\n      <th>City_Current_Month</th>\n      <th>Country_Current_Month</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>count</td>\n      <td>154.000000</td>\n      <td>154.000000</td>\n      <td>1.580000e+02</td>\n      <td>158.000000</td>\n      <td>158.000000</td>\n      <td>158.000000</td>\n      <td>158.000000</td>\n      <td>155.000000</td>\n      <td>158.000000</td>\n      <td>158.000000</td>\n      <td>158.000000</td>\n    </tr>\n    <tr>\n      <td>mean</td>\n      <td>1735.477922</td>\n      <td>1433.764286</td>\n      <td>1.232288e+06</td>\n      <td>370179.318038</td>\n      <td>58220.200886</td>\n      <td>30513.532405</td>\n      <td>4382.056962</td>\n      <td>10773.855355</td>\n      <td>102.560127</td>\n      <td>102.501646</td>\n      <td>102.696392</td>\n    </tr>\n    <tr>\n      <td>std</td>\n      <td>418.470194</td>\n      <td>324.473989</td>\n      <td>5.306693e+05</td>\n      <td>141523.797847</td>\n      <td>15921.655789</td>\n      <td>5659.057184</td>\n      <td>1478.607355</td>\n      <td>4356.931221</td>\n      <td>1.933328</td>\n      <td>1.870288</td>\n      <td>2.130184</td>\n    </tr>\n    <tr>\n      <td>min</td>\n      <td>648.900000</td>\n      <td>513.400000</td>\n      <td>4.178462e+05</td>\n      <td>150177.880000</td>\n      <td>30169.300000</td>\n      <td>15898.100000</td>\n      <td>1929.000000</td>\n      <td>0.000000</td>\n      <td>98.190000</td>\n      <td>98.100000</td>\n      <td>98.420000</td>\n    </tr>\n    <tr>\n      <td>25%</td>\n      <td>1407.000000</td>\n      <td>1248.750000</td>\n      <td>7.594505e+05</td>\n      <td>266873.472500</td>\n      <td>45259.582500</td>\n      <td>30061.150000</td>\n      <td>3389.000000</td>\n      <td>7707.265000</td>\n      <td>101.600000</td>\n      <td>101.700000</td>\n      <td>101.500000</td>\n    </tr>\n    <tr>\n      <td>50%</td>\n      <td>1811.000000</td>\n      <td>1478.500000</td>\n      <td>1.198368e+06</td>\n      <td>331685.275000</td>\n      <td>58927.850000</td>\n      <td>31227.535000</td>\n      <td>3389.000000</td>\n      <td>10774.000000</td>\n      <td>102.300000</td>\n      <td>102.210000</td>\n      <td>102.195000</td>\n    </tr>\n    <tr>\n      <td>75%</td>\n      <td>2017.750000</td>\n      <td>1680.500000</td>\n      <td>1.675284e+06</td>\n      <td>522183.535000</td>\n      <td>70023.812500</td>\n      <td>32986.025000</td>\n      <td>5924.000000</td>\n      <td>13636.430000</td>\n      <td>103.190000</td>\n      <td>103.127500</td>\n      <td>103.330000</td>\n    </tr>\n    <tr>\n      <td>max</td>\n      <td>2819.000000</td>\n      <td>2038.000000</td>\n      <td>2.236000e+06</td>\n      <td>625600.000000</td>\n      <td>93249.160000</td>\n      <td>39932.130000</td>\n      <td>6264.000000</td>\n      <td>23621.100000</td>\n      <td>108.740000</td>\n      <td>108.500000</td>\n      <td>109.270000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"},"execution_count":25}],"source":"dataset.describe()"},{"cell_type":"code","execution_count":26,"id":"awful-eight","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"B6425A3EC03747499B6DA0527F3D4658","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"<matplotlib.axes._subplots.AxesSubplot at 0x7f023b34e6a0>"},"execution_count":26},{"output_type":"display_data","metadata":{"needs_background":"light"},"data":{"text/plain":"<Figure size 1296x360 with 1 Axes>","text/html":"<img src=\"https://cdn.kesci.com/upload/rt/B6425A3EC03747499B6DA0527F3D4658/qtwulguftt.png\">"}}],"source":"plt.figure(figsize=(18, 5))\n# ax = plt.subplot(1,2,1)\nplt.title('The boxplot of total')\nsns.boxplot(data=dataset)\n# ax = plt.subplot(1,2,2)#箱式图祝我们了解数据分布"},{"cell_type":"code","execution_count":27,"id":"subsequent-ceiling","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"F38A367ED6E349338D474F6D122CAF43","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"            Current_Export  Current_Import         M2        M1       M0  \\\nMonth                                                                      \n2021-02-01             NaN             NaN  2236000.0  593500.0  91900.0   \n2021-01-01             NaN             NaN  2213000.0  625600.0  89600.0   \n2020-12-01          2819.0          2038.0  2186800.0  625600.0  84300.0   \n2020-11-01          2681.0          1926.0  2172000.0  618600.0  81600.0   \n2020-10-01          2372.0          1787.0  2149700.0  609200.0  81000.0   \n\n            FE_Reserve  Gold_Reserve  Fis_Current_Month_Value  \\\nMonth                                                           \n2021-02-01    32049.94        6264.0                      0.0   \n2021-01-01    32106.71        6264.0                      NaN   \n2020-12-01    32165.22        6264.0                  13406.0   \n2020-11-01    31784.90        6264.0                  10956.0   \n2020-10-01    31279.82        6264.0                  17531.0   \n\n            Nation_Current_Month  City_Current_Month  Country_Current_Month  \nMonth                                                                        \n2021-02-01                  99.8                99.8                   99.9  \n2021-01-01                  99.7                99.6                   99.9  \n2020-12-01                 100.2               100.2                  100.2  \n2020-11-01                  99.5                99.6                   99.2  \n2020-10-01                 100.5               100.5                  100.4  ","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Current_Export</th>\n      <th>Current_Import</th>\n      <th>M2</th>\n      <th>M1</th>\n      <th>M0</th>\n      <th>FE_Reserve</th>\n      <th>Gold_Reserve</th>\n      <th>Fis_Current_Month_Value</th>\n      <th>Nation_Current_Month</th>\n      <th>City_Current_Month</th>\n      <th>Country_Current_Month</th>\n    </tr>\n    <tr>\n      <th>Month</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>2021-02-01</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>2236000.0</td>\n      <td>593500.0</td>\n      <td>91900.0</td>\n      <td>32049.94</td>\n      <td>6264.0</td>\n      <td>0.0</td>\n      <td>99.8</td>\n      <td>99.8</td>\n      <td>99.9</td>\n    </tr>\n    <tr>\n      <td>2021-01-01</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>2213000.0</td>\n      <td>625600.0</td>\n      <td>89600.0</td>\n      <td>32106.71</td>\n      <td>6264.0</td>\n      <td>NaN</td>\n      <td>99.7</td>\n      <td>99.6</td>\n      <td>99.9</td>\n    </tr>\n    <tr>\n      <td>2020-12-01</td>\n      <td>2819.0</td>\n      <td>2038.0</td>\n      <td>2186800.0</td>\n      <td>625600.0</td>\n      <td>84300.0</td>\n      <td>32165.22</td>\n      <td>6264.0</td>\n      <td>13406.0</td>\n      <td>100.2</td>\n      <td>100.2</td>\n      <td>100.2</td>\n    </tr>\n    <tr>\n      <td>2020-11-01</td>\n      <td>2681.0</td>\n      <td>1926.0</td>\n      <td>2172000.0</td>\n      <td>618600.0</td>\n      <td>81600.0</td>\n      <td>31784.90</td>\n      <td>6264.0</td>\n      <td>10956.0</td>\n      <td>99.5</td>\n      <td>99.6</td>\n      <td>99.2</td>\n    </tr>\n    <tr>\n      <td>2020-10-01</td>\n      <td>2372.0</td>\n      <td>1787.0</td>\n      <td>2149700.0</td>\n      <td>609200.0</td>\n      <td>81000.0</td>\n      <td>31279.82</td>\n      <td>6264.0</td>\n      <td>17531.0</td>\n      <td>100.5</td>\n      <td>100.5</td>\n      <td>100.4</td>\n    </tr>\n  </tbody>\n</table>\n</div>"},"execution_count":27}],"source":"dataset.head()"},{"cell_type":"markdown","id":"unable-canon","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"5D7A0F79A02740B890DFC78898FCFC8E","trusted":true,"mdEditEnable":false},"source":"### 四、单标签特征工程"},{"cell_type":"code","execution_count":33,"id":"medieval-sheep","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"E4263FD333694F31BD935FB75C95B7CA","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"# 我们使用均值对数据填充\ndataset[\"Current_Export\"] = dataset[\"Current_Export\"].fillna(dataset[\"Current_Export\"].mean())\ndataset[\"Current_Import\"] = dataset[\"Current_Import\"].fillna(dataset[\"Current_Import\"].mean())\ndataset[\"Fis_Current_Month_Value\"] = dataset[\"Fis_Current_Month_Value\"].fillna(dataset[\"Fis_Current_Month_Value\"].mean())"},{"cell_type":"code","execution_count":34,"id":"silent-astronomy","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"1CA5A8A809A04323A62328AC790389B7","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"<matplotlib.axes._subplots.AxesSubplot at 0x7f023aed10b8>"},"execution_count":34},{"output_type":"display_data","metadata":{"needs_background":"light"},"data":{"text/plain":"<Figure size 576x576 with 2 Axes>","text/html":"<img src=\"https://cdn.kesci.com/upload/rt/1CA5A8A809A04323A62328AC790389B7/qtwurm3qy0.png\">"}}],"source":"plt.figure(figsize=(8,8))\nsns.heatmap(dataset.corr(),vmax=0.8,square=True,annot=True) #热力图分析相关性"},{"cell_type":"code","execution_count":35,"id":"lightweight-proposal","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"D8433A9DC8564C33BBBDC7F1C0E5053C","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"<matplotlib.axes._subplots.AxesSubplot at 0x7f023b067278>"},"execution_count":35},{"output_type":"display_data","metadata":{"needs_background":"light"},"data":{"text/plain":"<Figure size 1152x864 with 1 Axes>","text/html":"<img src=\"https://cdn.kesci.com/upload/rt/D8433A9DC8564C33BBBDC7F1C0E5053C/qtwursb7e5.png\">"}}],"source":"plt.figure(figsize=(16,12))\nsns.lineplot(data=dataset[[\"Current_Export\",\"Gold_Reserve\",\"Nation_Current_Month\",\"City_Current_Month\",\"Country_Current_Month\"]])"},{"cell_type":"code","execution_count":36,"id":"compliant-stage","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"67EB50EA73314994974018BD2B269D55","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"            Current_Export  Current_Import          M2         M1        M0  \\\nMonth                                                                         \n2021-02-01     1735.477922     1433.764286  2236000.00  593500.00  91900.00   \n2021-01-01     1735.477922     1433.764286  2213000.00  625600.00  89600.00   \n2020-12-01     2819.000000     2038.000000  2186800.00  625600.00  84300.00   \n2020-11-01     2681.000000     1926.000000  2172000.00  618600.00  81600.00   \n2020-10-01     2372.000000     1787.000000  2149700.00  609200.00  81000.00   \n2008-05-01     1205.000000     1003.000000   436221.60  153344.75  30169.30   \n2008-04-01     1187.000000     1020.000000   429313.72  151694.91  30789.61   \n2008-03-01     1090.000000      955.600000   423054.53  150867.47  30433.07   \n2008-02-01      873.700000      788.100000   421037.84  150177.88  32454.47   \n2008-01-01     1096.000000      901.700000   417846.17  154872.59  36673.15   \n\n            FE_Reserve  Gold_Reserve  Fis_Current_Month_Value  \\\nMonth                                                           \n2021-02-01    32049.94        6264.0                 0.000000   \n2021-01-01    32106.71        6264.0             10773.855355   \n2020-12-01    32165.22        6264.0             13406.000000   \n2020-11-01    31784.90        6264.0             10956.000000   \n2020-10-01    31279.82        6264.0             17531.000000   \n2008-05-01    17969.61        1929.0              6268.180000   \n2008-04-01    17566.55        1929.0              6824.900000   \n2008-03-01    16821.77        1929.0              4416.000000   \n2008-02-01    16471.34        1929.0              4158.800000   \n2008-01-01    15898.10        1929.0              7396.640000   \n\n            Nation_Current_Month  City_Current_Month  Country_Current_Month  \nMonth                                                                        \n2021-02-01                 99.80               99.80                  99.90  \n2021-01-01                 99.70               99.60                  99.90  \n2020-12-01                100.20              100.20                 100.20  \n2020-11-01                 99.50               99.60                  99.20  \n2020-10-01                100.50              100.50                 100.40  \n2008-05-01                107.72              107.33                 108.55  \n2008-04-01                108.48              108.11                 109.27  \n2008-03-01                108.31              107.97                 109.03  \n2008-02-01                108.74              108.50                 109.23  \n2008-01-01                107.08              106.78                 107.72  ","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Current_Export</th>\n      <th>Current_Import</th>\n      <th>M2</th>\n      <th>M1</th>\n      <th>M0</th>\n      <th>FE_Reserve</th>\n      <th>Gold_Reserve</th>\n      <th>Fis_Current_Month_Value</th>\n      <th>Nation_Current_Month</th>\n      <th>City_Current_Month</th>\n      <th>Country_Current_Month</th>\n    </tr>\n    <tr>\n      <th>Month</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>2021-02-01</td>\n      <td>1735.477922</td>\n      <td>1433.764286</td>\n      <td>2236000.00</td>\n      <td>593500.00</td>\n      <td>91900.00</td>\n      <td>32049.94</td>\n      <td>6264.0</td>\n      <td>0.000000</td>\n      <td>99.80</td>\n      <td>99.80</td>\n      <td>99.90</td>\n    </tr>\n    <tr>\n      <td>2021-01-01</td>\n      <td>1735.477922</td>\n      <td>1433.764286</td>\n      <td>2213000.00</td>\n      <td>625600.00</td>\n      <td>89600.00</td>\n      <td>32106.71</td>\n      <td>6264.0</td>\n      <td>10773.855355</td>\n      <td>99.70</td>\n      <td>99.60</td>\n      <td>99.90</td>\n    </tr>\n    <tr>\n      <td>2020-12-01</td>\n      <td>2819.000000</td>\n      <td>2038.000000</td>\n      <td>2186800.00</td>\n      <td>625600.00</td>\n      <td>84300.00</td>\n      <td>32165.22</td>\n      <td>6264.0</td>\n      <td>13406.000000</td>\n      <td>100.20</td>\n      <td>100.20</td>\n      <td>100.20</td>\n    </tr>\n    <tr>\n      <td>2020-11-01</td>\n      <td>2681.000000</td>\n      <td>1926.000000</td>\n      <td>2172000.00</td>\n      <td>618600.00</td>\n      <td>81600.00</td>\n      <td>31784.90</td>\n      <td>6264.0</td>\n      <td>10956.000000</td>\n      <td>99.50</td>\n      <td>99.60</td>\n      <td>99.20</td>\n    </tr>\n    <tr>\n      <td>2020-10-01</td>\n      <td>2372.000000</td>\n      <td>1787.000000</td>\n      <td>2149700.00</td>\n      <td>609200.00</td>\n      <td>81000.00</td>\n      <td>31279.82</td>\n      <td>6264.0</td>\n      <td>17531.000000</td>\n      <td>100.50</td>\n      <td>100.50</td>\n      <td>100.40</td>\n    </tr>\n    <tr>\n      <td>2008-05-01</td>\n      <td>1205.000000</td>\n      <td>1003.000000</td>\n      <td>436221.60</td>\n      <td>153344.75</td>\n      <td>30169.30</td>\n      <td>17969.61</td>\n      <td>1929.0</td>\n      <td>6268.180000</td>\n      <td>107.72</td>\n      <td>107.33</td>\n      <td>108.55</td>\n    </tr>\n    <tr>\n      <td>2008-04-01</td>\n      <td>1187.000000</td>\n      <td>1020.000000</td>\n      <td>429313.72</td>\n      <td>151694.91</td>\n      <td>30789.61</td>\n      <td>17566.55</td>\n      <td>1929.0</td>\n      <td>6824.900000</td>\n      <td>108.48</td>\n      <td>108.11</td>\n      <td>109.27</td>\n    </tr>\n    <tr>\n      <td>2008-03-01</td>\n      <td>1090.000000</td>\n      <td>955.600000</td>\n      <td>423054.53</td>\n      <td>150867.47</td>\n      <td>30433.07</td>\n      <td>16821.77</td>\n      <td>1929.0</td>\n      <td>4416.000000</td>\n      <td>108.31</td>\n      <td>107.97</td>\n      <td>109.03</td>\n    </tr>\n    <tr>\n      <td>2008-02-01</td>\n      <td>873.700000</td>\n      <td>788.100000</td>\n      <td>421037.84</td>\n      <td>150177.88</td>\n      <td>32454.47</td>\n      <td>16471.34</td>\n      <td>1929.0</td>\n      <td>4158.800000</td>\n      <td>108.74</td>\n      <td>108.50</td>\n      <td>109.23</td>\n    </tr>\n    <tr>\n      <td>2008-01-01</td>\n      <td>1096.000000</td>\n      <td>901.700000</td>\n      <td>417846.17</td>\n      <td>154872.59</td>\n      <td>36673.15</td>\n      <td>15898.10</td>\n      <td>1929.0</td>\n      <td>7396.640000</td>\n      <td>107.08</td>\n      <td>106.78</td>\n      <td>107.72</td>\n    </tr>\n  </tbody>\n</table>\n</div>"},"execution_count":36}],"source":"dataset.head().append(dataset.tail())"},{"cell_type":"code","execution_count":37,"id":"automated-start","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"9733157E436A4C198C2DC7CCAAD03388","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"dataset.to_csv(\"./zhibiao.csv\")"},{"cell_type":"markdown","id":"combined-newman","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"4664D541E65143E5815B3745BBDA3110","trusted":true,"mdEditEnable":false},"source":"### 五、使用 tsfresh 特征处理\n对于宏观的国民经济数据我们使用的是时间序列模型。主要针对机器学习树模型，因为是时序数据，和寻常的机器学习特征略有不同，我们要关注时间特征，滞后特征，滑窗特征等。在这里我们根据使用的模型来进行特征工程，所以我选择了tsfresh库，这个库相当于时间序列特征化的瑞士军刀。tsfresh能自动地计算出大量的时间序列特征，如峰数、平均值或最大值或更复杂的特征，如时间反转对称统计。该包还包含了特征重要性评估、特征选择的方法，因此，不管是基于时序数据的分类问题还是回归问题，tsfresh都会是特征提取一个不错的选择。"},{"cell_type":"code","execution_count":40,"id":"functional-yorkshire","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"FAF081F0E7EA45D78030C42EB64D05DF","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"stream","text":"Collecting tsfresh==0.17\n\u001b[?25l  Downloading https://files.pythonhosted.org/packages/35/b7/cbbfb02d50a93dbb710a730f168711eb343829e1cdea9f0d001d91aeefd6/tsfresh-0.17.0-py2.py3-none-any.whl (91kB)\n\u001b[K     |████████████████████████████████| 92kB 184kB/s eta 0:00:01\n\u001b[?25hRequirement already satisfied: patsy>=0.4.1 in /opt/conda/lib/python3.7/site-packages (from tsfresh==0.17) (0.5.1)\nRequirement already satisfied: scipy>=1.2.0 in /opt/conda/lib/python3.7/site-packages (from tsfresh==0.17) (1.3.1)\nRequirement already satisfied: statsmodels>=0.9.0 in /opt/conda/lib/python3.7/site-packages (from tsfresh==0.17) (0.10.1)\nRequirement already satisfied: numpy>=1.15.1 in /opt/conda/lib/python3.7/site-packages (from tsfresh==0.17) (1.17.2)\nRequirement already satisfied: pandas>=0.25.0 in /opt/conda/lib/python3.7/site-packages (from tsfresh==0.17) (0.25.1)\nCollecting dask[dataframe]>=2.9.0 (from tsfresh==0.17)\n\u001b[?25l  Downloading https://files.pythonhosted.org/packages/94/7c/f4b5259130ecdb0d2c7bc12b32eb8465b74e98b52c86f85de8b6ba5b112c/dask-2021.5.1-py3-none-any.whl (964kB)\n\u001b[K     |████████████████████████████████| 972kB 28kB/s eta 0:00:014     |█████████████▋                  | 409kB 442kB/s eta 0:00:02\n\u001b[?25hRequirement already satisfied: scikit-learn>=0.19.2 in /opt/conda/lib/python3.7/site-packages (from tsfresh==0.17) (0.21.3)\nRequirement already satisfied: tqdm>=4.10.0 in /opt/conda/lib/python3.7/site-packages (from tsfresh==0.17) (4.32.2)\nCollecting distributed>=2.11.0 (from tsfresh==0.17)\n\u001b[?25l  Downloading https://files.pythonhosted.org/packages/31/fa/92163eefde93e445db7b4946daccfcf825b95fe63e347807c55544e2686c/distributed-2021.5.1-py3-none-any.whl (705kB)\n\u001b[K     |████████████████████████████████| 706kB 65kB/s eta 0:00:013\n\u001b[?25hRequirement already satisfied: requests>=2.9.1 in /opt/conda/lib/python3.7/site-packages (from tsfresh==0.17) (2.22.0)\nRequirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from patsy>=0.4.1->tsfresh==0.17) (1.12.0)\nRequirement already satisfied: python-dateutil>=2.6.1 in /opt/conda/lib/python3.7/site-packages (from pandas>=0.25.0->tsfresh==0.17) (2.8.0)\nRequirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.7/site-packages (from pandas>=0.25.0->tsfresh==0.17) (2019.2)\nRequirement already satisfied: toolz>=0.8.2 in /opt/conda/lib/python3.7/site-packages (from dask[dataframe]>=2.9.0->tsfresh==0.17) (0.10.0)\nRequirement already satisfied: partd>=0.3.10 in /opt/conda/lib/python3.7/site-packages (from dask[dataframe]>=2.9.0->tsfresh==0.17) (1.0.0)\nRequirement already satisfied: pyyaml in /opt/conda/lib/python3.7/site-packages (from dask[dataframe]>=2.9.0->tsfresh==0.17) (5.1.2)\nCollecting fsspec>=0.6.0 (from dask[dataframe]>=2.9.0->tsfresh==0.17)\n\u001b[?25l  Downloading https://files.pythonhosted.org/packages/bc/52/816d1a3a599176057bf29dfacb1f8fadb61d35fbd96cb1bab4aaa7df83c0/fsspec-2021.5.0-py3-none-any.whl (111kB)\n\u001b[K     |████████████████████████████████| 112kB 43kB/s eta 0:00:01\n\u001b[?25hRequirement already satisfied: cloudpickle>=1.1.1 in /opt/conda/lib/python3.7/site-packages (from dask[dataframe]>=2.9.0->tsfresh==0.17) (1.2.2)\nRequirement already satisfied: joblib>=0.11 in /opt/conda/lib/python3.7/site-packages (from scikit-learn>=0.19.2->tsfresh==0.17) (0.13.2)\nRequirement already satisfied: click>=6.6 in /opt/conda/lib/python3.7/site-packages (from distributed>=2.11.0->tsfresh==0.17) (7.0)\nRequirement already satisfied: msgpack>=0.6.0 in /opt/conda/lib/python3.7/site-packages (from distributed>=2.11.0->tsfresh==0.17) (0.6.1)\nRequirement already satisfied: setuptools in /opt/conda/lib/python3.7/site-packages (from distributed>=2.11.0->tsfresh==0.17) (41.0.1)\nCollecting tblib>=1.6.0 (from distributed>=2.11.0->tsfresh==0.17)\n  Downloading https://files.pythonhosted.org/packages/f8/cd/2fad4add11c8837e72f50a30e2bda30e67a10d70462f826b291443a55c7d/tblib-1.7.0-py2.py3-none-any.whl\nRequirement already satisfied: tornado>=5; python_version < \"3.8\" in /opt/conda/lib/python3.7/site-packages (from distributed>=2.11.0->tsfresh==0.17) (6.0.3)\nRequirement already satisfied: psutil>=5.0 in /opt/conda/lib/python3.7/site-packages (from distributed>=2.11.0->tsfresh==0.17) (5.6.3)\nRequirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /opt/conda/lib/python3.7/site-packages (from distributed>=2.11.0->tsfresh==0.17) (2.1.0)\nRequirement already satisfied: zict>=0.1.3 in /opt/conda/lib/python3.7/site-packages (from distributed>=2.11.0->tsfresh==0.17) (1.0.0)\nRequirement already satisfied: idna<2.9,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests>=2.9.1->tsfresh==0.17) (2.8)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests>=2.9.1->tsfresh==0.17) (2019.6.16)\nRequirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests>=2.9.1->tsfresh==0.17) (1.25.3)\nRequirement already satisfied: chardet<3.1.0,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests>=2.9.1->tsfresh==0.17) (3.0.4)\nRequirement already satisfied: locket in /opt/conda/lib/python3.7/site-packages (from partd>=0.3.10->dask[dataframe]>=2.9.0->tsfresh==0.17) (0.2.0)\nRequirement already satisfied: heapdict in /opt/conda/lib/python3.7/site-packages (from zict>=0.1.3->distributed>=2.11.0->tsfresh==0.17) (1.0.0)\n\u001b[31mERROR: distributed 2021.5.1 has requirement cloudpickle>=1.5.0, but you'll have cloudpickle 1.2.2 which is incompatible.\u001b[0m\nInstalling collected packages: fsspec, dask, tblib, distributed, tsfresh\n  Found existing installation: fsspec 0.4.4\n    Uninstalling fsspec-0.4.4:\n      Successfully uninstalled fsspec-0.4.4\n  Found existing installation: dask 2.2.0\n    Uninstalling dask-2.2.0:\n      Successfully uninstalled dask-2.2.0\n  Found existing installation: tblib 1.4.0\n    Uninstalling tblib-1.4.0:\n      Successfully uninstalled tblib-1.4.0\n  Found existing installation: distributed 2.3.2\n    Uninstalling distributed-2.3.2:\n      Successfully uninstalled distributed-2.3.2\nSuccessfully installed dask-2021.5.1 distributed-2021.5.1 fsspec-2021.5.0 tblib-1.7.0 tsfresh-0.17.0\nNote: you may need to restart the kernel to use updated packages.\n","name":"stdout"}],"source":"pip install tsfresh==0.17"},{"metadata":{"id":"E6929E325CB4442D82A1C5322B7AF3AB","notebookId":"60b34773e77c4200173bea40","jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"trusted":true,"collapsed":false,"scrolled":false},"cell_type":"code","outputs":[],"source":"# 导入需要的tsfresh进行特征化,如果没有tsfresh库的话可以pip install tsfresh==0.17(最新版本和nunba冲突)\nimport tsfresh as tsf\nfrom tsfresh import extract_features, select_features\nfrom tsfresh.utilities.dataframe_functions import impute\nlabels=[\"Current_Export\",\"Current_Import\",\"M2\",\"M1\",\"M0\",\"FE_Reserve\",\"Gold_Reserve\",\"Fis_Current_Month_Value\",\"Nation_Current_Month\",\"City_Current_Month\",\"Country_Current_Month\"]","execution_count":41},{"cell_type":"code","execution_count":42,"id":"overall-harris","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"E11AB491809A43E89C99E83D0D34AC9D","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"dataset1=dataset.reset_index()"},{"cell_type":"code","execution_count":43,"id":"vietnamese-january","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"998B8858709440799198711E07C4E742","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"fe_da=dataset1[[\"Month\",labels[0]]]"},{"cell_type":"code","execution_count":44,"id":"vanilla-relevance","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"8082FA588A694C9D84E4BB570E1E43A2","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"stream","text":"Feature Extraction: 100%|██████████| 40/40 [00:04<00:00,  8.74it/s]\n","name":"stderr"},{"output_type":"execute_result","metadata":{},"data":{"text/plain":"            Current_Export__variance_larger_than_standard_deviation  \\\n2008-01-01                                                0.0         \n2008-02-01                                                0.0         \n2008-03-01                                                0.0         \n2008-04-01                                                0.0         \n2008-05-01                                                0.0         \n...                                                       ...         \n2020-10-01                                                0.0         \n2020-11-01                                                0.0         \n2020-12-01                                                0.0         \n2021-01-01                                                0.0         \n2021-02-01                                                0.0         \n\n            Current_Export__has_duplicate_max  \\\n2008-01-01                                0.0   \n2008-02-01                                0.0   \n2008-03-01                                0.0   \n2008-04-01                                0.0   \n2008-05-01                                0.0   \n...                                       ...   \n2020-10-01                                0.0   \n2020-11-01                                0.0   \n2020-12-01                                0.0   \n2021-01-01                                0.0   \n2021-02-01                                0.0   \n\n            Current_Export__has_duplicate_min  Current_Export__has_duplicate  \\\n2008-01-01                                0.0                            0.0   \n2008-02-01                                0.0                            0.0   \n2008-03-01                                0.0                            0.0   \n2008-04-01                                0.0                            0.0   \n2008-05-01                                0.0                            0.0   \n...                                       ...                            ...   \n2020-10-01                                0.0                            0.0   \n2020-11-01                                0.0                            0.0   \n2020-12-01                                0.0                            0.0   \n2021-01-01                                0.0                            0.0   \n2021-02-01                                0.0                            0.0   \n\n            Current_Export__sum_values  Current_Export__abs_energy  \\\n2008-01-01                 1096.000000                1.201216e+06   \n2008-02-01                  873.700000                7.633517e+05   \n2008-03-01                 1090.000000                1.188100e+06   \n2008-04-01                 1187.000000                1.408969e+06   \n2008-05-01                 1205.000000                1.452025e+06   \n...                                ...                         ...   \n2020-10-01                 2372.000000                5.626384e+06   \n2020-11-01                 2681.000000                7.187761e+06   \n2020-12-01                 2819.000000                7.946761e+06   \n2021-01-01                 1735.477922                3.011884e+06   \n2021-02-01                 1735.477922                3.011884e+06   \n\n            Current_Export__mean_abs_change  Current_Export__mean_change  \\\n2008-01-01                              NaN                          NaN   \n2008-02-01                              NaN                          NaN   \n2008-03-01                              NaN                          NaN   \n2008-04-01                              NaN                          NaN   \n2008-05-01                              NaN                          NaN   \n...                                     ...                          ...   \n2020-10-01                              NaN                          NaN   \n2020-11-01                              NaN                          NaN   \n2020-12-01                              NaN                          NaN   \n2021-01-01                              NaN                          NaN   \n2021-02-01                              NaN                          NaN   \n\n            Current_Export__mean_second_derivative_central  \\\n2008-01-01                                             NaN   \n2008-02-01                                             NaN   \n2008-03-01                                             NaN   \n2008-04-01                                             NaN   \n2008-05-01                                             NaN   \n...                                                    ...   \n2020-10-01                                             NaN   \n2020-11-01                                             NaN   \n2020-12-01                                             NaN   \n2021-01-01                                             NaN   \n2021-02-01                                             NaN   \n\n            Current_Export__median  ...  \\\n2008-01-01             1096.000000  ...   \n2008-02-01              873.700000  ...   \n2008-03-01             1090.000000  ...   \n2008-04-01             1187.000000  ...   \n2008-05-01             1205.000000  ...   \n...                            ...  ...   \n2020-10-01             2372.000000  ...   \n2020-11-01             2681.000000  ...   \n2020-12-01             2819.000000  ...   \n2021-01-01             1735.477922  ...   \n2021-02-01             1735.477922  ...   \n\n            Current_Export__fourier_entropy__bins_2  \\\n2008-01-01                                      NaN   \n2008-02-01                                      NaN   \n2008-03-01                                      NaN   \n2008-04-01                                      NaN   \n2008-05-01                                      NaN   \n...                                             ...   \n2020-10-01                                      NaN   \n2020-11-01                                      NaN   \n2020-12-01                                      NaN   \n2021-01-01                                      NaN   \n2021-02-01                                      NaN   \n\n            Current_Export__fourier_entropy__bins_3  \\\n2008-01-01                                      NaN   \n2008-02-01                                      NaN   \n2008-03-01                                      NaN   \n2008-04-01                                      NaN   \n2008-05-01                                      NaN   \n...                                             ...   \n2020-10-01                                      NaN   \n2020-11-01                                      NaN   \n2020-12-01                                      NaN   \n2021-01-01                                      NaN   \n2021-02-01                                      NaN   \n\n            Current_Export__fourier_entropy__bins_5  \\\n2008-01-01                                      NaN   \n2008-02-01                                      NaN   \n2008-03-01                                      NaN   \n2008-04-01                                      NaN   \n2008-05-01                                      NaN   \n...                                             ...   \n2020-10-01                                      NaN   \n2020-11-01                                      NaN   \n2020-12-01                                      NaN   \n2021-01-01                                      NaN   \n2021-02-01                                      NaN   \n\n            Current_Export__fourier_entropy__bins_10  \\\n2008-01-01                                       NaN   \n2008-02-01                                       NaN   \n2008-03-01                                       NaN   \n2008-04-01                                       NaN   \n2008-05-01                                       NaN   \n...                                              ...   \n2020-10-01                                       NaN   \n2020-11-01                                       NaN   \n2020-12-01                                       NaN   \n2021-01-01                                       NaN   \n2021-02-01                                       NaN   \n\n            Current_Export__fourier_entropy__bins_100  \\\n2008-01-01                                        NaN   \n2008-02-01                                        NaN   \n2008-03-01                                        NaN   \n2008-04-01                                        NaN   \n2008-05-01                                        NaN   \n...                                               ...   \n2020-10-01                                        NaN   \n2020-11-01                                        NaN   \n2020-12-01                                        NaN   \n2021-01-01                                        NaN   \n2021-02-01                                        NaN   \n\n            Current_Export__permutation_entropy__dimension_3__tau_1  \\\n2008-01-01                                                NaN         \n2008-02-01                                                NaN         \n2008-03-01                                                NaN         \n2008-04-01                                                NaN         \n2008-05-01                                                NaN         \n...                                                       ...         \n2020-10-01                                                NaN         \n2020-11-01                                                NaN         \n2020-12-01                                                NaN         \n2021-01-01                                                NaN         \n2021-02-01                                                NaN         \n\n            Current_Export__permutation_entropy__dimension_4__tau_1  \\\n2008-01-01                                                NaN         \n2008-02-01                                                NaN         \n2008-03-01                                                NaN         \n2008-04-01                                                NaN         \n2008-05-01                                                NaN         \n...                                                       ...         \n2020-10-01                                                NaN         \n2020-11-01                                                NaN         \n2020-12-01                                                NaN         \n2021-01-01                                                NaN         \n2021-02-01                                                NaN         \n\n            Current_Export__permutation_entropy__dimension_5__tau_1  \\\n2008-01-01                                                NaN         \n2008-02-01                                                NaN         \n2008-03-01                                                NaN         \n2008-04-01                                                NaN         \n2008-05-01                                                NaN         \n...                                                       ...         \n2020-10-01                                                NaN         \n2020-11-01                                                NaN         \n2020-12-01                                                NaN         \n2021-01-01                                                NaN         \n2021-02-01                                                NaN         \n\n            Current_Export__permutation_entropy__dimension_6__tau_1  \\\n2008-01-01                                                NaN         \n2008-02-01                                                NaN         \n2008-03-01                                                NaN         \n2008-04-01                                                NaN         \n2008-05-01                                                NaN         \n...                                                       ...         \n2020-10-01                                                NaN         \n2020-11-01                                                NaN         \n2020-12-01                                                NaN         \n2021-01-01                                                NaN         \n2021-02-01                                                NaN         \n\n            Current_Export__permutation_entropy__dimension_7__tau_1  \n2008-01-01                                                NaN        \n2008-02-01                                                NaN        \n2008-03-01                                                NaN        \n2008-04-01                                                NaN        \n2008-05-01                                                NaN        \n...                                                       ...        \n2020-10-01                                                NaN        \n2020-11-01                                                NaN        \n2020-12-01                                                NaN        \n2021-01-01                                                NaN        \n2021-02-01                                                NaN        \n\n[158 rows x 779 columns]","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Current_Export__variance_larger_than_standard_deviation</th>\n      <th>Current_Export__has_duplicate_max</th>\n      <th>Current_Export__has_duplicate_min</th>\n      <th>Current_Export__has_duplicate</th>\n      <th>Current_Export__sum_values</th>\n      <th>Current_Export__abs_energy</th>\n      <th>Current_Export__mean_abs_change</th>\n      <th>Current_Export__mean_change</th>\n      <th>Current_Export__mean_second_derivative_central</th>\n      <th>Current_Export__median</th>\n      <th>...</th>\n      <th>Current_Export__fourier_entropy__bins_2</th>\n      <th>Current_Export__fourier_entropy__bins_3</th>\n      <th>Current_Export__fourier_entropy__bins_5</th>\n      <th>Current_Export__fourier_entropy__bins_10</th>\n      <th>Current_Export__fourier_entropy__bins_100</th>\n      <th>Current_Export__permutation_entropy__dimension_3__tau_1</th>\n      <th>Current_Export__permutation_entropy__dimension_4__tau_1</th>\n      <th>Current_Export__permutation_entropy__dimension_5__tau_1</th>\n      <th>Current_Export__permutation_entropy__dimension_6__tau_1</th>\n      <th>Current_Export__permutation_entropy__dimension_7__tau_1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>2008-01-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1096.000000</td>\n      <td>1.201216e+06</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>1096.000000</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <td>2008-02-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>873.700000</td>\n      <td>7.633517e+05</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>873.700000</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <td>2008-03-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1090.000000</td>\n      <td>1.188100e+06</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>1090.000000</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <td>2008-04-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1187.000000</td>\n      <td>1.408969e+06</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>1187.000000</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <td>2008-05-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1205.000000</td>\n      <td>1.452025e+06</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>1205.000000</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <td>2020-10-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>2372.000000</td>\n      <td>5.626384e+06</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>2372.000000</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <td>2020-11-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>2681.000000</td>\n      <td>7.187761e+06</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>2681.000000</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <td>2020-12-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>2819.000000</td>\n      <td>7.946761e+06</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>2819.000000</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <td>2021-01-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1735.477922</td>\n      <td>3.011884e+06</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>1735.477922</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <td>2021-02-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1735.477922</td>\n      <td>3.011884e+06</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>1735.477922</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n  </tbody>\n</table>\n<p>158 rows × 779 columns</p>\n</div>"},"execution_count":44}],"source":"# 特征提取\ntrain_features = extract_features(fe_da, column_id='Month')\ntrain_features"},{"cell_type":"code","execution_count":45,"id":"modern-masters","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"7542837E65394A488E96E55E91B5AE93","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"from tsfresh.utilities.dataframe_functions import impute\nimport warnings\nwarnings.filterwarnings(\"ignore\")\n# 去除抽取特征中的NaN值\ntrain_features=impute(train_features)"},{"cell_type":"code","execution_count":46,"id":"trying-service","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"069AD26EE3C04A02A52B5DED5C00EE47","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"            Current_Export__variance_larger_than_standard_deviation  \\\n2008-01-01                                                0.0         \n2008-02-01                                                0.0         \n2008-03-01                                                0.0         \n2008-04-01                                                0.0         \n2008-05-01                                                0.0         \n...                                                       ...         \n2020-10-01                                                0.0         \n2020-11-01                                                0.0         \n2020-12-01                                                0.0         \n2021-01-01                                                0.0         \n2021-02-01                                                0.0         \n\n            Current_Export__has_duplicate_max  \\\n2008-01-01                                0.0   \n2008-02-01                                0.0   \n2008-03-01                                0.0   \n2008-04-01                                0.0   \n2008-05-01                                0.0   \n...                                       ...   \n2020-10-01                                0.0   \n2020-11-01                                0.0   \n2020-12-01                                0.0   \n2021-01-01                                0.0   \n2021-02-01                                0.0   \n\n            Current_Export__has_duplicate_min  Current_Export__has_duplicate  \\\n2008-01-01                                0.0                            0.0   \n2008-02-01                                0.0                            0.0   \n2008-03-01                                0.0                            0.0   \n2008-04-01                                0.0                            0.0   \n2008-05-01                                0.0                            0.0   \n...                                       ...                            ...   \n2020-10-01                                0.0                            0.0   \n2020-11-01                                0.0                            0.0   \n2020-12-01                                0.0                            0.0   \n2021-01-01                                0.0                            0.0   \n2021-02-01                                0.0                            0.0   \n\n            Current_Export__sum_values  Current_Export__abs_energy  \\\n2008-01-01                 1096.000000                1.201216e+06   \n2008-02-01                  873.700000                7.633517e+05   \n2008-03-01                 1090.000000                1.188100e+06   \n2008-04-01                 1187.000000                1.408969e+06   \n2008-05-01                 1205.000000                1.452025e+06   \n...                                ...                         ...   \n2020-10-01                 2372.000000                5.626384e+06   \n2020-11-01                 2681.000000                7.187761e+06   \n2020-12-01                 2819.000000                7.946761e+06   \n2021-01-01                 1735.477922                3.011884e+06   \n2021-02-01                 1735.477922                3.011884e+06   \n\n            Current_Export__mean_abs_change  Current_Export__mean_change  \\\n2008-01-01                              0.0                          0.0   \n2008-02-01                              0.0                          0.0   \n2008-03-01                              0.0                          0.0   \n2008-04-01                              0.0                          0.0   \n2008-05-01                              0.0                          0.0   \n...                                     ...                          ...   \n2020-10-01                              0.0                          0.0   \n2020-11-01                              0.0                          0.0   \n2020-12-01                              0.0                          0.0   \n2021-01-01                              0.0                          0.0   \n2021-02-01                              0.0                          0.0   \n\n            Current_Export__mean_second_derivative_central  \\\n2008-01-01                                             0.0   \n2008-02-01                                             0.0   \n2008-03-01                                             0.0   \n2008-04-01                                             0.0   \n2008-05-01                                             0.0   \n...                                                    ...   \n2020-10-01                                             0.0   \n2020-11-01                                             0.0   \n2020-12-01                                             0.0   \n2021-01-01                                             0.0   \n2021-02-01                                             0.0   \n\n            Current_Export__median  ...  \\\n2008-01-01             1096.000000  ...   \n2008-02-01              873.700000  ...   \n2008-03-01             1090.000000  ...   \n2008-04-01             1187.000000  ...   \n2008-05-01             1205.000000  ...   \n...                            ...  ...   \n2020-10-01             2372.000000  ...   \n2020-11-01             2681.000000  ...   \n2020-12-01             2819.000000  ...   \n2021-01-01             1735.477922  ...   \n2021-02-01             1735.477922  ...   \n\n            Current_Export__fourier_entropy__bins_2  \\\n2008-01-01                                      0.0   \n2008-02-01                                      0.0   \n2008-03-01                                      0.0   \n2008-04-01                                      0.0   \n2008-05-01                                      0.0   \n...                                             ...   \n2020-10-01                                      0.0   \n2020-11-01                                      0.0   \n2020-12-01                                      0.0   \n2021-01-01                                      0.0   \n2021-02-01                                      0.0   \n\n            Current_Export__fourier_entropy__bins_3  \\\n2008-01-01                                      0.0   \n2008-02-01                                      0.0   \n2008-03-01                                      0.0   \n2008-04-01                                      0.0   \n2008-05-01                                      0.0   \n...                                             ...   \n2020-10-01                                      0.0   \n2020-11-01                                      0.0   \n2020-12-01                                      0.0   \n2021-01-01                                      0.0   \n2021-02-01                                      0.0   \n\n            Current_Export__fourier_entropy__bins_5  \\\n2008-01-01                                      0.0   \n2008-02-01                                      0.0   \n2008-03-01                                      0.0   \n2008-04-01                                      0.0   \n2008-05-01                                      0.0   \n...                                             ...   \n2020-10-01                                      0.0   \n2020-11-01                                      0.0   \n2020-12-01                                      0.0   \n2021-01-01                                      0.0   \n2021-02-01                                      0.0   \n\n            Current_Export__fourier_entropy__bins_10  \\\n2008-01-01                                       0.0   \n2008-02-01                                       0.0   \n2008-03-01                                       0.0   \n2008-04-01                                       0.0   \n2008-05-01                                       0.0   \n...                                              ...   \n2020-10-01                                       0.0   \n2020-11-01                                       0.0   \n2020-12-01                                       0.0   \n2021-01-01                                       0.0   \n2021-02-01                                       0.0   \n\n            Current_Export__fourier_entropy__bins_100  \\\n2008-01-01                                        0.0   \n2008-02-01                                        0.0   \n2008-03-01                                        0.0   \n2008-04-01                                        0.0   \n2008-05-01                                        0.0   \n...                                               ...   \n2020-10-01                                        0.0   \n2020-11-01                                        0.0   \n2020-12-01                                        0.0   \n2021-01-01                                        0.0   \n2021-02-01                                        0.0   \n\n            Current_Export__permutation_entropy__dimension_3__tau_1  \\\n2008-01-01                                                0.0         \n2008-02-01                                                0.0         \n2008-03-01                                                0.0         \n2008-04-01                                                0.0         \n2008-05-01                                                0.0         \n...                                                       ...         \n2020-10-01                                                0.0         \n2020-11-01                                                0.0         \n2020-12-01                                                0.0         \n2021-01-01                                                0.0         \n2021-02-01                                                0.0         \n\n            Current_Export__permutation_entropy__dimension_4__tau_1  \\\n2008-01-01                                                0.0         \n2008-02-01                                                0.0         \n2008-03-01                                                0.0         \n2008-04-01                                                0.0         \n2008-05-01                                                0.0         \n...                                                       ...         \n2020-10-01                                                0.0         \n2020-11-01                                                0.0         \n2020-12-01                                                0.0         \n2021-01-01                                                0.0         \n2021-02-01                                                0.0         \n\n            Current_Export__permutation_entropy__dimension_5__tau_1  \\\n2008-01-01                                                0.0         \n2008-02-01                                                0.0         \n2008-03-01                                                0.0         \n2008-04-01                                                0.0         \n2008-05-01                                                0.0         \n...                                                       ...         \n2020-10-01                                                0.0         \n2020-11-01                                                0.0         \n2020-12-01                                                0.0         \n2021-01-01                                                0.0         \n2021-02-01                                                0.0         \n\n            Current_Export__permutation_entropy__dimension_6__tau_1  \\\n2008-01-01                                                0.0         \n2008-02-01                                                0.0         \n2008-03-01                                                0.0         \n2008-04-01                                                0.0         \n2008-05-01                                                0.0         \n...                                                       ...         \n2020-10-01                                                0.0         \n2020-11-01                                                0.0         \n2020-12-01                                                0.0         \n2021-01-01                                                0.0         \n2021-02-01                                                0.0         \n\n            Current_Export__permutation_entropy__dimension_7__tau_1  \n2008-01-01                                                0.0        \n2008-02-01                                                0.0        \n2008-03-01                                                0.0        \n2008-04-01                                                0.0        \n2008-05-01                                                0.0        \n...                                                       ...        \n2020-10-01                                                0.0        \n2020-11-01                                                0.0        \n2020-12-01                                                0.0        \n2021-01-01                                                0.0        \n2021-02-01                                                0.0        \n\n[158 rows x 779 columns]","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Current_Export__variance_larger_than_standard_deviation</th>\n      <th>Current_Export__has_duplicate_max</th>\n      <th>Current_Export__has_duplicate_min</th>\n      <th>Current_Export__has_duplicate</th>\n      <th>Current_Export__sum_values</th>\n      <th>Current_Export__abs_energy</th>\n      <th>Current_Export__mean_abs_change</th>\n      <th>Current_Export__mean_change</th>\n      <th>Current_Export__mean_second_derivative_central</th>\n      <th>Current_Export__median</th>\n      <th>...</th>\n      <th>Current_Export__fourier_entropy__bins_2</th>\n      <th>Current_Export__fourier_entropy__bins_3</th>\n      <th>Current_Export__fourier_entropy__bins_5</th>\n      <th>Current_Export__fourier_entropy__bins_10</th>\n      <th>Current_Export__fourier_entropy__bins_100</th>\n      <th>Current_Export__permutation_entropy__dimension_3__tau_1</th>\n      <th>Current_Export__permutation_entropy__dimension_4__tau_1</th>\n      <th>Current_Export__permutation_entropy__dimension_5__tau_1</th>\n      <th>Current_Export__permutation_entropy__dimension_6__tau_1</th>\n      <th>Current_Export__permutation_entropy__dimension_7__tau_1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>2008-01-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1096.000000</td>\n      <td>1.201216e+06</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1096.000000</td>\n      <td>...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <td>2008-02-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>873.700000</td>\n      <td>7.633517e+05</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>873.700000</td>\n      <td>...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <td>2008-03-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1090.000000</td>\n      <td>1.188100e+06</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1090.000000</td>\n      <td>...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <td>2008-04-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1187.000000</td>\n      <td>1.408969e+06</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1187.000000</td>\n      <td>...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <td>2008-05-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1205.000000</td>\n      <td>1.452025e+06</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1205.000000</td>\n      <td>...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <td>2020-10-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>2372.000000</td>\n      <td>5.626384e+06</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>2372.000000</td>\n      <td>...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <td>2020-11-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>2681.000000</td>\n      <td>7.187761e+06</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>2681.000000</td>\n      <td>...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <td>2020-12-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>2819.000000</td>\n      <td>7.946761e+06</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>2819.000000</td>\n      <td>...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <td>2021-01-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1735.477922</td>\n      <td>3.011884e+06</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1735.477922</td>\n      <td>...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <td>2021-02-01</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1735.477922</td>\n      <td>3.011884e+06</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1735.477922</td>\n      <td>...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n  </tbody>\n</table>\n<p>158 rows × 779 columns</p>\n</div>"},"execution_count":46}],"source":"train_features"},{"cell_type":"markdown","id":"wireless-milwaukee","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"976ABBB3A18242FB8749406B83E6798A","trusted":true,"mdEditEnable":false},"source":"    接下来，按照特征和响应变量之间的相关性进行特征选择，这一过程包含两步：首先单独计算每个特征和响应变量之间的相关性，然后利用Benjamini-Yekutieli procedure进行特征选择，决定哪些特征可以被保留。\n"},{"cell_type":"code","execution_count":47,"id":"level-cache","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"EFA38FC0CCAB472285BDB2514BC112EB","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"data_y=pd.DataFrame(fe_da).set_index(\"Month\")"},{"cell_type":"code","execution_count":48,"id":"documentary-fleece","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"B36678456C1944868A7A9C81EFA2C64C","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"            Current_Export__sum_values  \\\n2008-01-01                 1096.000000   \n2008-02-01                  873.700000   \n2008-03-01                 1090.000000   \n2008-04-01                 1187.000000   \n2008-05-01                 1205.000000   \n...                                ...   \n2020-10-01                 2372.000000   \n2020-11-01                 2681.000000   \n2020-12-01                 2819.000000   \n2021-01-01                 1735.477922   \n2021-02-01                 1735.477922   \n\n            Current_Export__cwt_coefficients__coeff_0__w_20__widths_(2, 5, 10, 20)  \\\n2008-01-01                                         212.558001                        \n2008-02-01                                         169.445187                        \n2008-03-01                                         211.394362                        \n2008-04-01                                         230.206521                        \n2008-05-01                                         233.697437                        \n...                                                       ...                        \n2020-10-01                                         460.025162                        \n2020-11-01                                         519.952555                        \n2020-12-01                                         546.716244                        \n2021-01-01                                         336.578209                        \n2021-02-01                                         336.578209                        \n\n            Current_Export__cwt_coefficients__coeff_0__w_10__widths_(2, 5, 10, 20)  \\\n2008-01-01                                         300.602407                        \n2008-02-01                                         239.631682                        \n2008-03-01                                         298.956774                        \n2008-04-01                                         325.561184                        \n2008-05-01                                         330.498085                        \n...                                                       ...                        \n2020-10-01                                         650.573823                        \n2020-11-01                                         735.323954                        \n2020-12-01                                         773.173528                        \n2021-01-01                                         475.993468                        \n2021-02-01                                         475.993468                        \n\n            Current_Export__cwt_coefficients__coeff_0__w_5__widths_(2, 5, 10, 20)  \\\n2008-01-01                                         425.116001                       \n2008-02-01                                         338.890374                       \n2008-03-01                                         422.788724                       \n2008-04-01                                         460.413042                       \n2008-05-01                                         467.394874                       \n...                                                       ...                       \n2020-10-01                                         920.050324                       \n2020-11-01                                        1039.905109                       \n2020-12-01                                        1093.432489                       \n2021-01-01                                         673.156419                       \n2021-02-01                                         673.156419                       \n\n            Current_Export__cwt_coefficients__coeff_0__w_2__widths_(2, 5, 10, 20)  \\\n2008-01-01                                         672.167417                       \n2008-02-01                                         535.832730                       \n2008-03-01                                         668.487668                       \n2008-04-01                                         727.976938                       \n2008-05-01                                         739.016184                       \n...                                                       ...                       \n2020-10-01                                        1454.727293                       \n2020-11-01                                        1644.234348                       \n2020-12-01                                        1728.868566                       \n2021-01-01                                        1064.353752                       \n2021-02-01                                        1064.353752                       \n\n            Current_Export__quantile__q_0.9  Current_Export__quantile__q_0.8  \\\n2008-01-01                      1096.000000                      1096.000000   \n2008-02-01                       873.700000                       873.700000   \n2008-03-01                      1090.000000                      1090.000000   \n2008-04-01                      1187.000000                      1187.000000   \n2008-05-01                      1205.000000                      1205.000000   \n...                                     ...                              ...   \n2020-10-01                      2372.000000                      2372.000000   \n2020-11-01                      2681.000000                      2681.000000   \n2020-12-01                      2819.000000                      2819.000000   \n2021-01-01                      1735.477922                      1735.477922   \n2021-02-01                      1735.477922                      1735.477922   \n\n            Current_Export__quantile__q_0.7  Current_Export__quantile__q_0.6  \\\n2008-01-01                      1096.000000                      1096.000000   \n2008-02-01                       873.700000                       873.700000   \n2008-03-01                      1090.000000                      1090.000000   \n2008-04-01                      1187.000000                      1187.000000   \n2008-05-01                      1205.000000                      1205.000000   \n...                                     ...                              ...   \n2020-10-01                      2372.000000                      2372.000000   \n2020-11-01                      2681.000000                      2681.000000   \n2020-12-01                      2819.000000                      2819.000000   \n2021-01-01                      1735.477922                      1735.477922   \n2021-02-01                      1735.477922                      1735.477922   \n\n            Current_Export__fft_coefficient__attr_\"real\"__coeff_0  ...  \\\n2008-01-01                                        1096.000000      ...   \n2008-02-01                                         873.700000      ...   \n2008-03-01                                        1090.000000      ...   \n2008-04-01                                        1187.000000      ...   \n2008-05-01                                        1205.000000      ...   \n...                                                       ...      ...   \n2020-10-01                                        2372.000000      ...   \n2020-11-01                                        2681.000000      ...   \n2020-12-01                                        2819.000000      ...   \n2021-01-01                                        1735.477922      ...   \n2021-02-01                                        1735.477922      ...   \n\n            Current_Export__quantile__q_0.2  Current_Export__quantile__q_0.1  \\\n2008-01-01                      1096.000000                      1096.000000   \n2008-02-01                       873.700000                       873.700000   \n2008-03-01                      1090.000000                      1090.000000   \n2008-04-01                      1187.000000                      1187.000000   \n2008-05-01                      1205.000000                      1205.000000   \n...                                     ...                              ...   \n2020-10-01                      2372.000000                      2372.000000   \n2020-11-01                      2681.000000                      2681.000000   \n2020-12-01                      2819.000000                      2819.000000   \n2021-01-01                      1735.477922                      1735.477922   \n2021-02-01                      1735.477922                      1735.477922   \n\n            Current_Export__minimum  Current_Export__maximum  \\\n2008-01-01              1096.000000              1096.000000   \n2008-02-01               873.700000               873.700000   \n2008-03-01              1090.000000              1090.000000   \n2008-04-01              1187.000000              1187.000000   \n2008-05-01              1205.000000              1205.000000   \n...                             ...                      ...   \n2020-10-01              2372.000000              2372.000000   \n2020-11-01              2681.000000              2681.000000   \n2020-12-01              2819.000000              2819.000000   \n2021-01-01              1735.477922              1735.477922   \n2021-02-01              1735.477922              1735.477922   \n\n            Current_Export__mean  Current_Export__median  \\\n2008-01-01           1096.000000             1096.000000   \n2008-02-01            873.700000              873.700000   \n2008-03-01           1090.000000             1090.000000   \n2008-04-01           1187.000000             1187.000000   \n2008-05-01           1205.000000             1205.000000   \n...                          ...                     ...   \n2020-10-01           2372.000000             2372.000000   \n2020-11-01           2681.000000             2681.000000   \n2020-12-01           2819.000000             2819.000000   \n2021-01-01           1735.477922             1735.477922   \n2021-02-01           1735.477922             1735.477922   \n\n            Current_Export__abs_energy  Current_Export__quantile__q_0.3  \\\n2008-01-01                1.201216e+06                      1096.000000   \n2008-02-01                7.633517e+05                       873.700000   \n2008-03-01                1.188100e+06                      1090.000000   \n2008-04-01                1.408969e+06                      1187.000000   \n2008-05-01                1.452025e+06                      1205.000000   \n...                                ...                              ...   \n2020-10-01                5.626384e+06                      2372.000000   \n2020-11-01                7.187761e+06                      2681.000000   \n2020-12-01                7.946761e+06                      2819.000000   \n2021-01-01                3.011884e+06                      1735.477922   \n2021-02-01                3.011884e+06                      1735.477922   \n\n            Current_Export__fft_coefficient__attr_\"abs\"__coeff_0  \\\n2008-01-01                                        1096.000000      \n2008-02-01                                         873.700000      \n2008-03-01                                        1090.000000      \n2008-04-01                                        1187.000000      \n2008-05-01                                        1205.000000      \n...                                                       ...      \n2020-10-01                                        2372.000000      \n2020-11-01                                        2681.000000      \n2020-12-01                                        2819.000000      \n2021-01-01                                        1735.477922      \n2021-02-01                                        1735.477922      \n\n            Current_Export__benford_correlation  \n2008-01-01                             0.864123  \n2008-02-01                            -0.272809  \n2008-03-01                             0.864123  \n2008-04-01                             0.864123  \n2008-05-01                             0.864123  \n...                                         ...  \n2020-10-01                             0.295657  \n2020-11-01                             0.295657  \n2020-12-01                             0.295657  \n2021-01-01                             0.864123  \n2021-02-01                             0.864123  \n\n[158 rows x 21 columns]","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Current_Export__sum_values</th>\n      <th>Current_Export__cwt_coefficients__coeff_0__w_20__widths_(2, 5, 10, 20)</th>\n      <th>Current_Export__cwt_coefficients__coeff_0__w_10__widths_(2, 5, 10, 20)</th>\n      <th>Current_Export__cwt_coefficients__coeff_0__w_5__widths_(2, 5, 10, 20)</th>\n      <th>Current_Export__cwt_coefficients__coeff_0__w_2__widths_(2, 5, 10, 20)</th>\n      <th>Current_Export__quantile__q_0.9</th>\n      <th>Current_Export__quantile__q_0.8</th>\n      <th>Current_Export__quantile__q_0.7</th>\n      <th>Current_Export__quantile__q_0.6</th>\n      <th>Current_Export__fft_coefficient__attr_\"real\"__coeff_0</th>\n      <th>...</th>\n      <th>Current_Export__quantile__q_0.2</th>\n      <th>Current_Export__quantile__q_0.1</th>\n      <th>Current_Export__minimum</th>\n      <th>Current_Export__maximum</th>\n      <th>Current_Export__mean</th>\n      <th>Current_Export__median</th>\n      <th>Current_Export__abs_energy</th>\n      <th>Current_Export__quantile__q_0.3</th>\n      <th>Current_Export__fft_coefficient__attr_\"abs\"__coeff_0</th>\n      <th>Current_Export__benford_correlation</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>2008-01-01</td>\n      <td>1096.000000</td>\n      <td>212.558001</td>\n      <td>300.602407</td>\n      <td>425.116001</td>\n      <td>672.167417</td>\n      <td>1096.000000</td>\n      <td>1096.000000</td>\n      <td>1096.000000</td>\n      <td>1096.000000</td>\n      <td>1096.000000</td>\n      <td>...</td>\n      <td>1096.000000</td>\n      <td>1096.000000</td>\n      <td>1096.000000</td>\n      <td>1096.000000</td>\n      <td>1096.000000</td>\n      <td>1096.000000</td>\n      <td>1.201216e+06</td>\n      <td>1096.000000</td>\n      <td>1096.000000</td>\n      <td>0.864123</td>\n    </tr>\n    <tr>\n      <td>2008-02-01</td>\n      <td>873.700000</td>\n      <td>169.445187</td>\n      <td>239.631682</td>\n      <td>338.890374</td>\n      <td>535.832730</td>\n      <td>873.700000</td>\n      <td>873.700000</td>\n      <td>873.700000</td>\n      <td>873.700000</td>\n      <td>873.700000</td>\n      <td>...</td>\n      <td>873.700000</td>\n      <td>873.700000</td>\n      <td>873.700000</td>\n      <td>873.700000</td>\n      <td>873.700000</td>\n      <td>873.700000</td>\n      <td>7.633517e+05</td>\n      <td>873.700000</td>\n      <td>873.700000</td>\n      <td>-0.272809</td>\n    </tr>\n    <tr>\n      <td>2008-03-01</td>\n      <td>1090.000000</td>\n      <td>211.394362</td>\n      <td>298.956774</td>\n      <td>422.788724</td>\n      <td>668.487668</td>\n      <td>1090.000000</td>\n      <td>1090.000000</td>\n      <td>1090.000000</td>\n      <td>1090.000000</td>\n      <td>1090.000000</td>\n      <td>...</td>\n      <td>1090.000000</td>\n      <td>1090.000000</td>\n      <td>1090.000000</td>\n      <td>1090.000000</td>\n      <td>1090.000000</td>\n      <td>1090.000000</td>\n      <td>1.188100e+06</td>\n      <td>1090.000000</td>\n      <td>1090.000000</td>\n      <td>0.864123</td>\n    </tr>\n    <tr>\n      <td>2008-04-01</td>\n      <td>1187.000000</td>\n      <td>230.206521</td>\n      <td>325.561184</td>\n      <td>460.413042</td>\n      <td>727.976938</td>\n      <td>1187.000000</td>\n      <td>1187.000000</td>\n      <td>1187.000000</td>\n      <td>1187.000000</td>\n      <td>1187.000000</td>\n      <td>...</td>\n      <td>1187.000000</td>\n      <td>1187.000000</td>\n      <td>1187.000000</td>\n      <td>1187.000000</td>\n      <td>1187.000000</td>\n      <td>1187.000000</td>\n      <td>1.408969e+06</td>\n      <td>1187.000000</td>\n      <td>1187.000000</td>\n      <td>0.864123</td>\n    </tr>\n    <tr>\n      <td>2008-05-01</td>\n      <td>1205.000000</td>\n      <td>233.697437</td>\n      <td>330.498085</td>\n      <td>467.394874</td>\n      <td>739.016184</td>\n      <td>1205.000000</td>\n      <td>1205.000000</td>\n      <td>1205.000000</td>\n      <td>1205.000000</td>\n      <td>1205.000000</td>\n      <td>...</td>\n      <td>1205.000000</td>\n      <td>1205.000000</td>\n      <td>1205.000000</td>\n      <td>1205.000000</td>\n      <td>1205.000000</td>\n      <td>1205.000000</td>\n      <td>1.452025e+06</td>\n      <td>1205.000000</td>\n      <td>1205.000000</td>\n      <td>0.864123</td>\n    </tr>\n    <tr>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <td>2020-10-01</td>\n      <td>2372.000000</td>\n      <td>460.025162</td>\n      <td>650.573823</td>\n      <td>920.050324</td>\n      <td>1454.727293</td>\n      <td>2372.000000</td>\n      <td>2372.000000</td>\n      <td>2372.000000</td>\n      <td>2372.000000</td>\n      <td>2372.000000</td>\n      <td>...</td>\n      <td>2372.000000</td>\n      <td>2372.000000</td>\n      <td>2372.000000</td>\n      <td>2372.000000</td>\n      <td>2372.000000</td>\n      <td>2372.000000</td>\n      <td>5.626384e+06</td>\n      <td>2372.000000</td>\n      <td>2372.000000</td>\n      <td>0.295657</td>\n    </tr>\n    <tr>\n      <td>2020-11-01</td>\n      <td>2681.000000</td>\n      <td>519.952555</td>\n      <td>735.323954</td>\n      <td>1039.905109</td>\n      <td>1644.234348</td>\n      <td>2681.000000</td>\n      <td>2681.000000</td>\n      <td>2681.000000</td>\n      <td>2681.000000</td>\n      <td>2681.000000</td>\n      <td>...</td>\n      <td>2681.000000</td>\n      <td>2681.000000</td>\n      <td>2681.000000</td>\n      <td>2681.000000</td>\n      <td>2681.000000</td>\n      <td>2681.000000</td>\n      <td>7.187761e+06</td>\n      <td>2681.000000</td>\n      <td>2681.000000</td>\n      <td>0.295657</td>\n    </tr>\n    <tr>\n      <td>2020-12-01</td>\n      <td>2819.000000</td>\n      <td>546.716244</td>\n      <td>773.173528</td>\n      <td>1093.432489</td>\n      <td>1728.868566</td>\n      <td>2819.000000</td>\n      <td>2819.000000</td>\n      <td>2819.000000</td>\n      <td>2819.000000</td>\n      <td>2819.000000</td>\n      <td>...</td>\n      <td>2819.000000</td>\n      <td>2819.000000</td>\n      <td>2819.000000</td>\n      <td>2819.000000</td>\n      <td>2819.000000</td>\n      <td>2819.000000</td>\n      <td>7.946761e+06</td>\n      <td>2819.000000</td>\n      <td>2819.000000</td>\n      <td>0.295657</td>\n    </tr>\n    <tr>\n      <td>2021-01-01</td>\n      <td>1735.477922</td>\n      <td>336.578209</td>\n      <td>475.993468</td>\n      <td>673.156419</td>\n      <td>1064.353752</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>...</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>3.011884e+06</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>0.864123</td>\n    </tr>\n    <tr>\n      <td>2021-02-01</td>\n      <td>1735.477922</td>\n      <td>336.578209</td>\n      <td>475.993468</td>\n      <td>673.156419</td>\n      <td>1064.353752</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>...</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>3.011884e+06</td>\n      <td>1735.477922</td>\n      <td>1735.477922</td>\n      <td>0.864123</td>\n    </tr>\n  </tbody>\n</table>\n<p>158 rows × 21 columns</p>\n</div>"},"execution_count":48}],"source":"## from tsfresh import select_features\n# 按照特征和数据label之间的相关性进行特征选择\ntrain_features_filtered = select_features(train_features,data_y[labels[0]],test_for_real_target_real_feature='kendall')\ntrain_features_filtered"},{"cell_type":"code","execution_count":49,"id":"happy-demographic","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"30FE525046F649829B1FDA30CB178B7B","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"            Current_Export\nMonth                     \n2021-02-01     1735.477922\n2021-01-01     1735.477922\n2020-12-01     2819.000000\n2020-11-01     2681.000000\n2020-10-01     2372.000000\n...                    ...\n2008-05-01     1205.000000\n2008-04-01     1187.000000\n2008-03-01     1090.000000\n2008-02-01      873.700000\n2008-01-01     1096.000000\n\n[158 rows x 1 columns]","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Current_Export</th>\n    </tr>\n    <tr>\n      <th>Month</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>2021-02-01</td>\n      <td>1735.477922</td>\n    </tr>\n    <tr>\n      <td>2021-01-01</td>\n      <td>1735.477922</td>\n    </tr>\n    <tr>\n      <td>2020-12-01</td>\n      <td>2819.000000</td>\n    </tr>\n    <tr>\n      <td>2020-11-01</td>\n      <td>2681.000000</td>\n    </tr>\n    <tr>\n      <td>2020-10-01</td>\n      <td>2372.000000</td>\n    </tr>\n    <tr>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <td>2008-05-01</td>\n      <td>1205.000000</td>\n    </tr>\n    <tr>\n      <td>2008-04-01</td>\n      <td>1187.000000</td>\n    </tr>\n    <tr>\n      <td>2008-03-01</td>\n      <td>1090.000000</td>\n    </tr>\n    <tr>\n      <td>2008-02-01</td>\n      <td>873.700000</td>\n    </tr>\n    <tr>\n      <td>2008-01-01</td>\n      <td>1096.000000</td>\n    </tr>\n  </tbody>\n</table>\n<p>158 rows × 1 columns</p>\n</div>"},"execution_count":49}],"source":"data_y"},{"cell_type":"code","execution_count":50,"id":"advance-palestine","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"853673D00633499D97CF0FF2266369A5","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"train_fea=train_features_filtered.reset_index()"},{"cell_type":"code","execution_count":51,"id":"charming-wrestling","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"E7D738F758634BD889CE8B5CDF7110F8","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"dataaa=data_y.reset_index()"},{"cell_type":"code","execution_count":52,"id":"pressed-grove","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"98E63FEEF405484C82B17ABD161FEB4E","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"# 合并数据\ndataTR=pd.merge(train_fea,dataaa,left_on=\"index\",right_on=\"Month\")"},{"cell_type":"code","execution_count":53,"id":"collective-supervision","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"089400A0A7DD43BC888F0EDB5E180F04","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"add_data=pd.DataFrame(pd.date_range(start='2021-03-01',periods=122,freq=\"MS\"))\nadd_data.columns=[\"index\"]"},{"cell_type":"code","execution_count":54,"id":"acute-perfume","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"D66B1FD2D1A24B10AAF85EF5A1B176AE","trusted":true,"collapsed":false,"scrolled":false},"outputs":[{"output_type":"execute_result","metadata":{},"data":{"text/plain":"       index\n0 2021-03-01\n1 2021-04-01\n2 2021-05-01\n3 2021-06-01\n4 2021-07-01","text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>index</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>0</td>\n      <td>2021-03-01</td>\n    </tr>\n    <tr>\n      <td>1</td>\n      <td>2021-04-01</td>\n    </tr>\n    <tr>\n      <td>2</td>\n      <td>2021-05-01</td>\n    </tr>\n    <tr>\n      <td>3</td>\n      <td>2021-06-01</td>\n    </tr>\n    <tr>\n      <td>4</td>\n      <td>2021-07-01</td>\n    </tr>\n  </tbody>\n</table>\n</div>"},"execution_count":54}],"source":"add_data.head()"},{"cell_type":"code","execution_count":55,"id":"proprietary-decrease","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"7F20804C1FF4433E99F597015FF59EAA","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"dataTR=pd.merge(dataTR,add_data,on=\"index\",how=\"outer\")"},{"cell_type":"code","execution_count":56,"id":"prime-keeping","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"71A8A2CC68A94422A898FEF99226261F","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"dataTR=dataTR.set_index(\"index\")"},{"cell_type":"code","execution_count":58,"id":"equivalent-cherry","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"F5774620560B4F3380E138A7C840C57D","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"dataTR.to_csv(\"./特征工程/base_Current_Export.csv\",index=False)"},{"cell_type":"markdown","id":"polar-duncan","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"C4CD44F4900E445FA4E3418B0747D9EB","trusted":true,"mdEditEnable":false},"source":"### 六、多个标签的特征处理"},{"cell_type":"code","execution_count":59,"id":"fuzzy-applicant","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"D9D85BBA16FE4474AC5E74609A667CBA","trusted":true,"collapsed":false,"scrolled":false},"outputs":[],"source":"from tsfresh.utilities.dataframe_functions import impute\nimport warnings\nwarnings.filterwarnings(\"ignore\")"},{"cell_type":"code","execution_count":60,"id":"prescribed-plain","metadata":{"scrolled":false,"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"57CB4FD2ECD64D8DA0A1E83101243455","trusted":true,"collapsed":false},"outputs":[{"output_type":"stream","text":"Feature Extraction: 100%|██████████| 40/40 [00:04<00:00,  8.91it/s]\n","name":"stderr"},{"output_type":"stream","text":"正在处理——Current_Export————完成\n","name":"stdout"},{"output_type":"stream","text":"Feature Extraction: 100%|██████████| 40/40 [00:04<00:00,  8.99it/s]\n","name":"stderr"},{"output_type":"stream","text":"正在处理——Current_Import————完成\n","name":"stdout"},{"output_type":"stream","text":"Feature Extraction: 100%|██████████| 40/40 [00:04<00:00,  8.84it/s]\n","name":"stderr"},{"output_type":"stream","text":"正在处理——M2————完成\n","name":"stdout"},{"output_type":"stream","text":"Feature Extraction: 100%|██████████| 40/40 [00:04<00:00, 10.67it/s]\n","name":"stderr"},{"output_type":"stream","text":"正在处理——M1————完成\n","name":"stdout"},{"output_type":"stream","text":"Feature Extraction: 100%|██████████| 40/40 [00:04<00:00,  8.79it/s]\n","name":"stderr"},{"output_type":"stream","text":"正在处理——M0————完成\n","name":"stdout"},{"output_type":"stream","text":"Feature Extraction: 100%|██████████| 40/40 [00:04<00:00,  8.99it/s]\n","name":"stderr"},{"output_type":"stream","text":"正在处理——FE_Reserve————完成\n","name":"stdout"},{"output_type":"stream","text":"Feature Extraction: 100%|██████████| 40/40 [00:04<00:00,  8.88it/s]\n","name":"stderr"},{"output_type":"stream","text":"正在处理——Gold_Reserve————完成\n","name":"stdout"},{"output_type":"stream","text":"Feature Extraction: 100%|██████████| 40/40 [00:04<00:00,  8.95it/s]\n","name":"stderr"},{"output_type":"stream","text":"正在处理——Fis_Current_Month_Value————完成\n","name":"stdout"},{"output_type":"stream","text":"Feature Extraction: 100%|██████████| 40/40 [00:04<00:00,  8.87it/s]\n","name":"stderr"},{"output_type":"stream","text":"正在处理——Nation_Current_Month————完成\n","name":"stdout"},{"output_type":"stream","text":"Feature Extraction: 100%|██████████| 40/40 [00:04<00:00,  9.06it/s]\n","name":"stderr"},{"output_type":"stream","text":"正在处理——City_Current_Month————完成\n","name":"stdout"},{"output_type":"stream","text":"Feature Extraction: 100%|██████████| 40/40 [00:04<00:00,  9.12it/s]\n","name":"stderr"},{"output_type":"stream","text":"正在处理——Country_Current_Month————完成\n特征工程结束\n","name":"stdout"}],"source":"# 鉴于上面特征处理，这里批量化特征处理\nj=0\nfor i in labels:\n    # 特征提取\n    fe_da=dataset1[[\"Month\",labels[j]]]\n    train_features = extract_features(fe_da, column_id='Month')    \n    train_features=impute(train_features)\n    data_y=pd.DataFrame(fe_da).set_index(\"Month\")\n    train_features_filtered = select_features(train_features,data_y[labels[j]],test_for_real_target_real_feature='kendall')\n    train_fea=train_features_filtered.reset_index()\n    dataaa=data_y.reset_index()\n    dataTR=pd.merge(train_fea,dataaa,left_on=\"index\",right_on=\"Month\")\n    add_data=pd.DataFrame(pd.date_range(start='2021-03-01',periods=22,freq=\"MS\"))\n    add_data.columns=[\"Month\"]\n    dataTR=pd.merge(dataTR,add_data,on=\"Month\",how=\"outer\")\n    dataTR=dataTR.set_index(\"index\")\n    dataTR.to_csv(\"./特征工程/base_\"+i+\".csv\",index=False)\n    j+=1\n    print(\"正在处理——\"+i+\"————完成\")\nprint(\"特征工程结束\")"},{"cell_type":"code","execution_count":null,"id":"rotary-booking","metadata":{"jupyter":{},"tags":[],"slideshow":{"slide_type":"slide"},"id":"6763994D7BCB49DC8C496A26D12F0DBB","trusted":true},"outputs":[],"source":""}],"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"name":"python","mimetype":"text/x-python","nbconvert_exporter":"python","file_extension":".py","version":"3.5.2","pygments_lexer":"ipython3"}},"nbformat":4,"nbformat_minor":5}